855 files changed, 91851 insertions, 7504 deletions
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 14129f149a75..5e983031cc11 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -101,14 +101,23 @@ style to do this even if your device holds the default setting,
 because this shows that you did think about these issues wrt. your
 device.
 
-The query is performed via a call to dma_set_mask():
+The query is performed via a call to dma_set_mask_and_coherent():
 
-	int dma_set_mask(struct device *dev, u64 mask);
+	int dma_set_mask_and_coherent(struct device *dev, u64 mask);
 
-The query for consistent allocations is performed via a call to
-dma_set_coherent_mask():
+which will query the mask for both streaming and coherent APIs together.
+If you have some special requirements, then the following two separate
+queries can be used instead:
 
-	int dma_set_coherent_mask(struct device *dev, u64 mask);
+	The query for streaming mappings is performed via a call to
+	dma_set_mask():
+
+		int dma_set_mask(struct device *dev, u64 mask);
+
+	The query for consistent allocations is performed via a call
+	to dma_set_coherent_mask():
+
+		int dma_set_coherent_mask(struct device *dev, u64 mask);
 
 Here, dev is a pointer to the device struct of your device, and mask
 is a bit mask describing which bits of an address your device
@@ -137,7 +146,7 @@ exactly why.
 
 The standard 32-bit addressing device would do something like this:
 
-	if (dma_set_mask(dev, DMA_BIT_MASK(32))) {
+	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
 		printk(KERN_WARNING
 		       "mydev: No suitable DMA available.\n");
 		goto ignore_this_device;
@@ -171,22 +180,20 @@ the case would look like this:
 
 	int using_dac, consistent_using_dac;
 
-	if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
+	if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
 		using_dac = 1;
 	   	consistent_using_dac = 1;
-		dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
-	} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
+	} else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
 		using_dac = 0;
 		consistent_using_dac = 0;
-		dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 	} else {
 		printk(KERN_WARNING
 		       "mydev: No suitable DMA available.\n");
 		goto ignore_this_device;
 	}
 
-dma_set_coherent_mask() will always be able to set the same or a
-smaller mask as dma_set_mask(). However for the rare case that a
+The coherent coherent mask will always be able to set the same or a
+smaller mask as the streaming mask. However for the rare case that a
 device driver only uses consistent allocations, one would have to
 check the return value from dma_set_coherent_mask().
 
@@ -199,9 +206,9 @@ address you might do something like:
 		goto ignore_this_device;
 	}
 
-When dma_set_mask() is successful, and returns zero, the kernel saves
-away this mask you have provided.  The kernel will use this
-information later when you make DMA mappings.
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided.  The
+kernel will use this information later when you make DMA mappings.
 
 There is a case which we are aware of at this time, which is worth
 mentioning in this documentation.  If your device supports multiple
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 78a6c569d204..e865279cec58 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -142,6 +142,14 @@ internal API for use by the platform than an external API for use by
 driver writers.
 
 int
+dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+streaming and coherent DMA mask parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+int
 dma_set_mask(struct device *dev, u64 mask)
 
 Checks to see if the mask is possible and updates the device
diff --git a/Documentation/arm/small_task_packing.txt b/Documentation/arm/small_task_packing.txt
new file mode 100644
index 000000000000..43f0a8b80234
--- /dev/null
+++ b/Documentation/arm/small_task_packing.txt
@@ -0,0 +1,136 @@
+Small Task Packing in the big.LITTLE MP Reference Patch Set
+
+What is small task packing?
+----
+Simply that the scheduler will fit as many small tasks on a single CPU
+as possible before using other CPUs. A small task is defined as one
+whose tracked load is less than 90% of a NICE_0 task. This is a change
+from the usual behavior since the scheduler will normally use an idle
+CPU for a waking task unless that task is considered cache hot.
+
+
+How is it implemented?
+----
+Since all small tasks must wake up relatively frequently, the main
+requirement for packing small tasks is to select a partly-busy CPU when
+waking rather than looking for an idle CPU. We use the tracked load of
+the CPU runqueue to determine how heavily loaded each CPU is and the
+tracked load of the task to determine if it will fit on the CPU. We
+always start with the lowest-numbered CPU in a sched domain and stop
+looking when we find a CPU with enough space for the task.
+
+Some further tweaks are necessary to suppress load balancing when the
+CPU is not fully loaded, otherwise the scheduler attempts to spread
+tasks evenly across the domain.
+
+
+How does it interact with the HMP patches?
+----
+Firstly, we only enable packing on the little domain. The intent is that
+the big domain is intended to spread tasks amongst the available CPUs
+one-task-per-CPU. The little domain however is attempting to use as
+little power as possible while servicing its tasks.
+
+Secondly, since we offload big tasks onto little CPUs in order to try
+to devote one CPU to each task, we have a threshold above which we do
+not try to pack a task and instead will select an idle CPU if possible.
+This maintains maximum forward progress for busy tasks temporarily
+demoted from big CPUs.
+
+
+Can the behaviour be tuned?
+----
+Yes, the load level of a 'full' CPU can be easily modified in the source
+and is exposed through sysfs as /sys/kernel/hmp/packing_limit to be
+changed at runtime. The presence of the packing behaviour is controlled
+by CONFIG_SCHED_HMP_LITTLE_PACKING and can be disabled at run-time
+using /sys/kernel/hmp/packing_enable.
+The definition of a small task is hard coded as 90% of NICE_0_LOAD
+and cannot be modified at run time.
+
+
+Why do I need to tune it?
+----
+The optimal configuration is likely to be different depending upon the
+design and manufacturing of your SoC.
+
+In the main, there are two system effects from enabling small task
+packing.
+
+1. CPU operating point may increase
+2. wakeup latency of tasks may be increased
+
+There are also likely to be secondary effects from loading one CPU
+rather than spreading tasks.
+
+Note that all of these system effects are dependent upon the workload
+under consideration.
+
+
+CPU Operating Point
+----
+The primary impact of loading one CPU with a number of light tasks is to
+increase the compute requirement of that CPU since it is no longer idle
+as often. Increased compute requirement causes an increase in the
+frequency of the CPU through CPUfreq.
+
+Consider this example:
+We have a system with 3 CPUs which can operate at any frequency between
+350MHz and 1GHz. The system has 6 tasks which would each produce 10%
+load at 1GHz. The scheduler has frequency-invariant load scaling
+enabled. Our DVFS governor aims for 80% utilization at the chosen
+frequency.
+
+Without task packing, these tasks will be spread out amongst all CPUs
+such that each has 2. This will produce roughly 20% system load, and
+the frequency of the package will remain at 350MHz.
+
+With task packing set to the default packing_limit, all of these tasks
+will sit on one CPU and require a package frequency of ~750MHz to reach
+80% utilization. (0.75 = 0.6 * 0.8).
+
+When a package operates on a single frequency domain, all CPUs in that
+package share frequency and voltage.
+
+Depending upon the SoC implementation there can be a significant amount
+of energy lost to leakage from idle CPUs. The decision about how
+loaded a CPU must be to be considered 'full' is therefore controllable
+through sysfs (sys/kernel/hmp/packing_limit) and directly in the code.
+
+Continuing the example, lets set packing_limit to 450 which means we
+will pack tasks until the total load of all running tasks >= 450. In
+practise, this is very similar to a 55% idle 1Ghz CPU.
+
+Now we are only able to place 4 tasks on CPU0, and two will overflow
+onto CPU1. CPU0 will have a load of 40% and CPU1 will have a load of
+20%. In order to still hit 80% utilization, CPU0 now only needs to
+operate at (0.4*0.8=0.32) 320MHz, which means that the lowest operating
+point will be selected, the same as in the non-packing case, except that
+now CPU2 is no longer needed and can be power-gated.
+
+In order to use less energy, the saving from power-gating CPU2 must be
+more than the energy spent running CPU0 for the extra cycles. This
+depends upon the SoC implementation.
+
+This is obviously a contrived example requiring all the tasks to
+be runnable at the same time, but it illustrates the point.
+
+
+Wakeup Latency
+----
+This is an unavoidable consequence of trying to pack tasks together
+rather than giving them a CPU each. If you cannot find an acceptable
+level of wakeup latency, you should turn packing off.
+
+Cyclictest is a good test application for determining the added latency
+when configuring packing.
+
+
+Why is it turned off for the VersatileExpress V2P_CA15A7 CoreTile?
+----
+Simply, this core tile only has power gating for the whole A7 package.
+When small task packing is enabled, all our low-energy use cases
+normally fit onto one A7 CPU. We therefore end up with 2 mostly-idle
+CPUs and one mostly-busy CPU. This decreases the amount of time
+available where the whole package is idle and can be turned off.
+
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 9c4d388daddc..1b0c968098aa 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -68,13 +68,23 @@ Image target is available instead.
 
 Requirement: MANDATORY
 
-The decompressed kernel image contains a 32-byte header as follows:
+The decompressed kernel image contains a 64-byte header as follows:
 
-  u32 magic	= 0x14000008;	/* branch to stext, little-endian */
-  u32 res0	= 0;		/* reserved */
+  u32 code0;			/* Executable code */
+  u32 code1;			/* Executable code */
   u64 text_offset;		/* Image load offset */
+  u64 res0	= 0;		/* reserved */
   u64 res1	= 0;		/* reserved */
   u64 res2	= 0;		/* reserved */
+  u64 res3	= 0;		/* reserved */
+  u64 res4	= 0;		/* reserved */
+  u32 magic	= 0x644d5241;	/* Magic number, little endian, "ARM\x64" */
+  u32 res5 = 0;      		/* reserved */
+
+
+Header notes:
+
+- code0/code1 are responsible for branching to stext.
 
 The image must be placed at the specified offset (currently 0x80000)
 from the start of the system RAM and called there. The start of the
@@ -101,8 +111,14 @@ Before jumping into the kernel, the following conditions must be met:
 - Caches, MMUs
   The MMU must be off.
   Instruction cache may be on or off.
-  Data cache must be off and invalidated.
-  External caches (if present) must be configured and disabled.
+  The address range corresponding to the loaded kernel image must be
+  cleaned to the PoC. In the presence of a system cache or other
+  coherent masters with caches enabled, this will typically require
+  cache maintenance by VA rather than set/way operations.
+  System caches which respect the architected cache maintenance by VA
+  operations must be configured and may be enabled.
+  System caches which do not respect architected cache maintenance by VA
+  operations (not recommended) must be configured and disabled.
 
 - Architected timers
   CNTFRQ must be programmed with the timer frequency.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 5f583af0a6e1..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -21,7 +21,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to
 TTBR0.
 
 
-AArch64 Linux memory layout:
+AArch64 Linux memory layout with 4KB pages:
 
 Start			End			Size		Use
 -----------------------------------------------------------------------
@@ -35,17 +35,46 @@ ffffffbc00000000	ffffffbdffffffff	   8GB		vmemmap
 
 ffffffbe00000000	ffffffbffbbfffff	  ~8GB		[guard, future vmmemap]
 
-ffffffbffbc00000	ffffffbffbdfffff	   2MB		earlyprintk device
+ffffffbffa000000	ffffffbffaffffff	  16MB		PCI I/O space
 
-ffffffbffbe00000	ffffffbffbe0ffff	  64KB		PCI I/O space
+ffffffbffb000000	ffffffbffbbfffff	  12MB		[guard]
 
-ffffffbbffff0000	ffffffbcffffffff	  ~2MB		[guard]
+ffffffbffbc00000	ffffffbffbdfffff	   2MB		fixed mappings
+
+ffffffbffbe00000	ffffffbffbffffff	   2MB		[guard]
 
 ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 
 ffffffc000000000	ffffffffffffffff	 256GB		kernel logical memory map
 
 
+AArch64 Linux memory layout with 64KB pages:
+
+Start			End			Size		Use
+-----------------------------------------------------------------------
+0000000000000000	000003ffffffffff	   4TB		user
+
+fffffc0000000000	fffffdfbfffeffff	  ~2TB		vmalloc
+
+fffffdfbffff0000	fffffdfbffffffff	  64KB		[guard page]
+
+fffffdfc00000000	fffffdfdffffffff	   8GB		vmemmap
+
+fffffdfe00000000	fffffdfffbbfffff	  ~8GB		[guard, future vmmemap]
+
+fffffdfffa000000	fffffdfffaffffff	  16MB		PCI I/O space
+
+fffffdfffb000000	fffffdfffbbfffff	  12MB		[guard]
+
+fffffdfffbc00000	fffffdfffbdfffff	   2MB		fixed mappings
+
+fffffdfffbe00000	fffffdfffbffffff	   2MB		[guard]
+
+fffffdfffc000000	fffffdffffffffff	  64MB		modules
+
+fffffe0000000000	ffffffffffffffff	   2TB		kernel logical memory map
+
+
 Translation table lookup with 4KB pages:
 
 +--------+--------+--------+--------+--------+--------+--------+--------+
@@ -73,3 +102,10 @@ Translation table lookup with 64KB pages:
  |                 |    +--------------------------> [41:29] L2 index (only 38:29 used)
  |                 +-------------------------------> [47:42] L1 index (not used)
  +-------------------------------------------------> [63] TTBR0/1
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start			End			Size		Use
+-----------------------------------------------------------------------
+0000004000000000	0000007fffffffff	 256GB		kernel objects mapped in HYP
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
new file mode 100644
index 000000000000..d9995f1f51b3
--- /dev/null
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -0,0 +1,34 @@
+		Tagged virtual addresses in AArch64 Linux
+		=========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date  : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use, with the following caveats:
+
+	(1) The kernel requires that all user addresses passed to EL1
+	    are tagged with tag 0x00. This means that any syscall
+	    parameters containing user virtual addresses *must* have
+	    their top byte cleared before trapping to the kernel.
+
+	(2) Non-zero tags are not preserved when delivering signals.
+	    This means that signal handlers in applications making use
+	    of tags cannot rely on the tag information for user virtual
+	    addresses being maintained for fields inside siginfo_t.
+	    One exception to this rule is for signals raised in response
+	    to watchpoint debug exceptions, where the tag information
+	    will be preserved.
+
+	(3) Special care should be taken when using tagged pointers,
+	    since it is likely that C compilers will not hazard two
+	    virtual addresses differing only in the upper byte.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index 20746e5abe6f..06fc7602593a 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -1,10 +1,14 @@
 * ARM architected timer
 
-ARM cores may have a per-core architected timer, which provides per-cpu timers.
+ARM cores may have a per-core architected timer, which provides per-cpu timers,
+or a memory mapped architected timer, which provides up to 8 frames with a
+physical and optional virtual timer per frame.
 
-The timer is attached to a GIC to deliver its per-processor interrupts.
+The per-core architected timer is attached to a GIC to deliver its
+per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC
+to deliver its interrupts via SPIs.
 
-** Timer node properties:
+** CP15 Timer node properties:
 
 - compatible : Should at least contain one of
 	"arm,armv7-timer"
@@ -26,3 +30,52 @@ Example:
 			     <1 10 0xf08>;
 		clock-frequency = <100000000>;
 	};
+
+** Memory mapped timer node properties:
+
+- compatible : Should at least contain "arm,armv7-timer-mem".
+
+- clock-frequency : The frequency of the main counter, in Hz. Optional.
+
+- reg : The control frame base address.
+
+Note that #address-cells, #size-cells, and ranges shall be present to ensure
+the CPU can address a frame's registers.
+
+A timer node has up to 8 frame sub-nodes, each with the following properties:
+
+- frame-number: 0 to 7.
+
+- interrupts : Interrupt list for physical and virtual timers in that order.
+  The virtual timer interrupt is optional.
+
+- reg : The first and second view base addresses in that order. The second view
+  base address is optional.
+
+- status : "disabled" indicates the frame is not available for use. Optional.
+
+Example:
+
+	timer@f0000000 {
+		compatible = "arm,armv7-timer-mem";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		reg = <0xf0000000 0x1000>;
+		clock-frequency = <50000000>;
+
+		frame@f0001000 {
+			frame-number = <0>
+			interrupts = <0 13 0x8>,
+				     <0 14 0x8>;
+			reg = <0xf0001000 0x1000>,
+			      <0xf0002000 0x1000>;
+		};
+
+		frame@f0003000 {
+			frame-number = <1>
+			interrupts = <0 15 0x8>;
+			reg = <0xf0003000 0x1000>;
+			status = "disabled";
+		};
+	};
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
new file mode 100644
index 000000000000..92d36e2aa877
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -0,0 +1,172 @@
+=======================================================
+ARM CCI cache coherent interconnect binding description
+=======================================================
+
+ARM multi-cluster systems maintain intra-cluster coherency through a
+cache coherent interconnect (CCI) that is capable of monitoring bus
+transactions and manage coherency, TLB invalidations and memory barriers.
+
+It allows snooping and distributed virtual memory message broadcast across
+clusters, through memory mapped interface, with a global control register
+space and multiple sets of interface control registers, one per slave
+interface.
+
+Bindings for the CCI node follow the ePAPR standard, available from:
+
+www.power.org/documentation/epapr-version-1-1/
+
+with the addition of the bindings described in this document which are
+specific to ARM.
+
+* CCI interconnect node
+
+	Description: Describes a CCI cache coherent Interconnect component
+
+	Node name must be "cci".
+	Node's parent must be the root node /, and the address space visible
+	through the CCI interconnect is the same as the one seen from the
+	root node (ie from CPUs perspective as per DT standard).
+	Every CCI node has to define the following properties:
+
+	- compatible
+		Usage: required
+		Value type: <string>
+		Definition: must be set to
+			    "arm,cci-400"
+
+	- reg
+		Usage: required
+		Value type: <prop-encoded-array>
+		Definition: A standard property. Specifies base physical
+			    address of CCI control registers common to all
+			    interfaces.
+
+	- ranges:
+		Usage: required
+		Value type: <prop-encoded-array>
+		Definition: A standard property. Follow rules in the ePAPR for
+			    hierarchical bus addressing. CCI interfaces
+			    addresses refer to the parent node addressing
+			    scheme to declare their register bases.
+
+	CCI interconnect node can define the following child nodes:
+
+	- CCI control interface nodes
+
+		Node name must be "slave-if".
+		Parent node must be CCI interconnect node.
+
+		A CCI control interface node must contain the following
+		properties:
+
+		- compatible
+			Usage: required
+			Value type: <string>
+			Definition: must be set to
+				    "arm,cci-400-ctrl-if"
+
+		- interface-type:
+			Usage: required
+			Value type: <string>
+			Definition: must be set to one of {"ace", "ace-lite"}
+				    depending on the interface type the node
+				    represents.
+
+		- reg:
+			Usage: required
+			Value type: <prop-encoded-array>
+			Definition: the base address and size of the
+				    corresponding interface programming
+				    registers.
+
+* CCI interconnect bus masters
+
+	Description: masters in the device tree connected to a CCI port
+		     (inclusive of CPUs and their cpu nodes).
+
+	A CCI interconnect bus master node must contain the following
+	properties:
+
+	- cci-control-port:
+		Usage: required
+		Value type: <phandle>
+		Definition: a phandle containing the CCI control interface node
+			    the master is connected to.
+
+Example:
+
+	cpus {
+		#size-cells = <0>;
+		#address-cells = <1>;
+
+		CPU0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			cci-control-port = <&cci_control1>;
+			reg = <0x0>;
+		};
+
+		CPU1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			cci-control-port = <&cci_control1>;
+			reg = <0x1>;
+		};
+
+		CPU2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			cci-control-port = <&cci_control2>;
+			reg = <0x100>;
+		};
+
+		CPU3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			cci-control-port = <&cci_control2>;
+			reg = <0x101>;
+		};
+
+	};
+
+	dma0: dma@3000000 {
+		compatible = "arm,pl330", "arm,primecell";
+		cci-control-port = <&cci_control0>;
+		reg = <0x0 0x3000000 0x0 0x1000>;
+		interrupts = <10>;
+		#dma-cells = <1>;
+		#dma-channels = <8>;
+		#dma-requests = <32>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x6000>;
+
+		cci_control0: slave-if@1000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace-lite";
+			reg = <0x1000 0x1000>;
+		};
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+This CCI node corresponds to a CCI component whose control registers sits
+at address 0x000000002c090000.
+CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
+CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
+CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
index 3dfb0c0384f5..535774577238 100644
--- a/Documentation/devicetree/bindings/arm/gic.txt
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -49,6 +49,11 @@ Optional
   regions, used when the GIC doesn't have banked registers. The offset is
   cpu-offset * cpu-nr.
 
+- arm,routable-irqs : Total number of gic irq inputs which are not directly
+		  connected from the peripherals, but are routed dynamically
+		  by a crossbar/multiplexer preceding the GIC. The GIC irq
+		  input line is assigned dynamically when the corresponding
+		  peripheral's crossbar line is mapped.
 Example:
 
 	intc: interrupt-controller@fff11000 {
@@ -56,6 +61,7 @@ Example:
 		#interrupt-cells = <3>;
 		#address-cells = <1>;
 		interrupt-controller;
+		arm,routable-irqs = <160>;
 		reg = <0xfff11000 0x1000>,
 		      <0xfff10100 0x100>;
 	};
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 343781b9f246..4ce82d045a6b 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,6 +16,9 @@ Required properties:
 	"arm,arm1176-pmu"
 	"arm,arm1136-pmu"
 - interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+	If there are more than one cluster with same CPU type
+	then there should be separate PMU nodes per cluster.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
new file mode 100644
index 000000000000..3b8fbf3c00c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
@@ -0,0 +1,19 @@
+ARM Dual Cluster System Configuration Block
+-------------------------------------------
+
+The Dual Cluster System Configuration Block (DCSCB) provides basic
+functionality for controlling clocks, resets and configuration pins in
+the Dual Cluster System implemented by the Real-Time System Model (RTSM).
+
+Required properties:
+
+- compatible : should be "arm,rtsm,dcscb"
+
+- reg : physical base address and the size of the registers window
+
+Example:
+
+	dcscb@60000000 {
+		compatible = "arm,rtsm,dcscb";
+		reg = <0x60000000 0x1000>;
+	};
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000000..1a2cd3d266db
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+		specifier.
+
+Example:
+	mailbox: mailbox {
+		...
+		#mbox-cells = <1>;
+	};
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+		required by the client. The use of this property
+		is discouraged in favor of using index in list of
+		'mboxes' while requesting a mailbox. Instead the
+		platforms may define channel indices, in DT headers,
+		to something legible.
+
+Example:
+	pwr_cntrl: power {
+		...
+		mbox-names = "pwr-ctrl", "rpc";
+		mboxes = <&mailbox 0
+			&mailbox 1>;
+	};
diff --git a/Documentation/devicetree/bindings/mfd/vexpress-spc.txt b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
new file mode 100644
index 000000000000..1d71dc2ff151
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
@@ -0,0 +1,35 @@
+* ARM Versatile Express Serial Power Controller device tree bindings
+
+Latest ARM development boards implement a power management interface (serial
+power controller - SPC) that is capable of managing power/voltage and
+operating point transitions, through memory mapped registers interface.
+
+On testchips like TC2 it also provides a configuration interface that can
+be used to read/write values which cannot be read/written through simple
+memory mapped reads/writes.
+
+- spc node
+
+	- compatible:
+		Usage: required
+		Value type: <stringlist>
+		Definition: must be
+			    "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"
+	- reg:
+		Usage: required
+		Value type: <prop-encode-array>
+		Definition: A standard property that specifies the base address
+			    and the size of the SPC address space
+	- interrupts:
+		Usage: required
+		Value type: <prop-encoded-array>
+		Definition:  SPC interrupt configuration. A standard property
+			     that follows ePAPR interrupts specifications
+
+Example:
+
+spc: spc@7fff0000 {
+	compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+	reg = <0 0x7FFF0000 0 0x1000>;
+	interrupts = <0 95 4>;
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index c95ea8278f87..b275be49a546 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -126,3 +126,55 @@ device; they may be grandchildren, for example. Whether this is legal, and
 whether there is any interaction between the child and intermediate parent
 nodes, is again defined entirely by the binding for the individual pin
 controller device.
+
+== Using generic pinconfig options ==
+
+Generic pinconfig parameters can be used by defining a separate node containing
+the applicable parameters (and optional values), like:
+
+pcfg_pull_up: pcfg_pull_up {
+	bias-pull-up;
+	drive-strength = <20>;
+};
+
+This node should then be referenced in the appropriate pinctrl node as a phandle
+and parsed in the driver using the pinconf_generic_parse_dt_config function.
+
+Supported configuration parameters are:
+
+bias-disable		- disable any pin bias
+bias-high-impedance	- high impedance mode ("third-state", "floating")
+bias-bus-hold		- latch weakly
+bias-pull-up		- pull up the pin
+bias-pull-down		- pull down the pin
+bias-pull-pin-default	- use pin-default pull state
+drive-push-pull		- drive actively high and low
+drive-open-drain	- drive with open drain
+drive-open-source	- drive with open source
+drive-strength		- sink or source at most X mA
+input-enable		- enable input on pin (no effect on output)
+input-disable		- disable input on pin (no effect on output)
+input-schmitt-enable	- enable schmitt-trigger mode
+input-schmitt-disable	- disable schmitt-trigger mode
+input-debounce		- debounce mode with debound time X
+low-power-enable	- enable low power mode
+low-power-disable	- disable low power mode
+output-low		- set the pin to output mode with low level
+output-high		- set the pin to output mode with high level
+slew-rate		- set the slew rate
+
+Arguments for parameters:
+
+- bias-pull-up, -down and -pin-default take as optional argument 0 to disable
+  the pull, on hardware supporting it the pull strength in Ohm. bias-disable
+  will also disable any active pull.
+
+- drive-strength takes as argument the target strength in mA.
+
+- input-debounce takes the debounce time in usec as argument
+  or 0 to disable debouncing
+
+All parameters not listed here, do not take an argument.
+
+More in-depth documentation on these parameters can be found in
+<include/linux/pinctrl/pinconfig-generic.h>
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
new file mode 100644
index 000000000000..f5db6b72a36f
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -0,0 +1,595 @@
+* Thermal Framework Device Tree descriptor
+
+This file describes a generic binding to provide a way of
+defining hardware thermal structure using device tree.
+A thermal structure includes thermal zones and their components,
+such as trip points, polling intervals, sensors and cooling devices
+binding descriptors.
+
+The target of device tree thermal descriptors is to describe only
+the hardware thermal aspects. The thermal device tree bindings are
+not about how the system must control or which algorithm or policy
+must be taken in place.
+
+There are five types of nodes involved to describe thermal bindings:
+- thermal sensors: devices which may be used to take temperature
+  measurements.
+- cooling devices: devices which may be used to dissipate heat.
+- trip points: describe key temperatures at which cooling is recommended. The
+  set of points should be chosen based on hardware limits.
+- cooling maps: used to describe links between trip points and cooling devices;
+- thermal zones: used to describe thermal data within the hardware;
+
+The following is a description of each of these node types.
+
+* Thermal sensor devices
+
+Thermal sensor devices are nodes providing temperature sensing capabilities on
+thermal zones. Typical devices are I2C ADC converters and bandgaps. These are
+nodes providing temperature data to thermal zones. Thermal sensor devices may
+control one or more internal sensors.
+
+Required property:
+- #thermal-sensor-cells: Used to provide sensor device specific information
+  Type: unsigned	 while referring to it. Typically 0 on thermal sensor
+  Size: one cell	 nodes with only one sensor, and at least 1 on nodes
+			 with several internal sensors, in order
+			 to identify uniquely the sensor instances within
+			 the IC. See thermal zone binding for more details
+			 on how consumers refer to sensor devices.
+
+* Cooling device nodes
+
+Cooling devices are nodes providing control on power dissipation. There
+are essentially two ways to provide control on power dissipation. First
+is by means of regulating device performance, which is known as passive
+cooling. A typical passive cooling is a CPU that has dynamic voltage and
+frequency scaling (DVFS), and uses lower frequencies as cooling states.
+Second is by means of activating devices in order to remove
+the dissipated heat, which is known as active cooling, e.g. regulating
+fan speeds. In both cases, cooling devices shall have a way to determine
+the state of cooling in which the device is.
+
+Any cooling device has a range of cooling states (i.e. different levels
+of heat dissipation). For example a fan's cooling states correspond to
+the different fan speeds possible. Cooling states are referred to by
+single unsigned integers, where larger numbers mean greater heat
+dissipation. The precise set of cooling states associated with a device
+(as referred to be the cooling-min-state and cooling-max-state
+properties) should be defined in a particular device's binding.
+For more examples of cooling devices, refer to the example sections below.
+
+Required properties:
+- cooling-min-state:	An integer indicating the smallest
+  Type: unsigned	cooling state accepted. Typically 0.
+  Size: one cell
+
+- cooling-max-state:	An integer indicating the largest
+  Type: unsigned	cooling state accepted.
+  Size: one cell
+
+- #cooling-cells:	Used to provide cooling device specific information
+  Type: unsigned	while referring to it. Must be at least 2, in order
+  Size: one cell      	to specify minimum and maximum cooling state used
+			in the reference. The first cell is the minimum
+			cooling state requested and the second cell is
+			the maximum cooling state requested in the reference.
+			See Cooling device maps section below for more details
+			on how consumers refer to cooling devices.
+
+* Trip points
+
+The trip node is a node to describe a point in the temperature domain
+in which the system takes an action. This node describes just the point,
+not the action.
+
+Required properties:
+- temperature:		An integer indicating the trip temperature level,
+  Type: signed		in millicelsius.
+  Size: one cell
+
+- hysteresis:		A low hysteresis value on temperature property (above).
+  Type: unsigned	This is a relative value, in millicelsius.
+  Size: one cell
+
+- type:			a string containing the trip type. Expected values are:
+	"active":	A trip point to enable active cooling
+	"passive":	A trip point to enable passive cooling
+	"hot":		A trip point to notify emergency
+	"critical":	Hardware not reliable.
+  Type: string
+
+* Cooling device maps
+
+The cooling device maps node is a node to describe how cooling devices
+get assigned to trip points of the zone. The cooling devices are expected
+to be loaded in the target system.
+
+Required properties:
+- cooling-device:	A phandle of a cooling device with its specifier,
+  Type: phandle +	referring to which cooling device is used in this
+    cooling specifier	binding. In the cooling specifier, the first cell
+			is the minimum cooling state and the second cell
+			is the maximum cooling state used in this map.
+- trip:			A phandle of a trip point node within the same thermal
+  Type: phandle of	zone.
+   trip point node
+
+Optional property:
+- contribution:		The cooling contribution to the thermal zone of the
+  Type: unsigned	referred cooling device at the referred trip point.
+  Size: one cell      	The contribution is a ratio of the sum
+			of all cooling contributions within a thermal zone.
+
+Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
+limit specifier means:
+(i)   - minimum state allowed for minimum cooling state used in the reference.
+(ii)  - maximum state allowed for maximum cooling state used in the reference.
+Refer to include/dt-bindings/thermal/thermal.h for definition of this constant.
+
+* Thermal zone nodes
+
+The thermal zone node is the node containing all the required info
+for describing a thermal zone, including its cooling device bindings. The
+thermal zone node must contain, apart from its own properties, one sub-node
+containing trip nodes and one sub-node containing all the zone cooling maps.
+
+Required properties:
+- polling-delay:	The maximum number of milliseconds to wait between polls
+  Type: unsigned	when checking this thermal zone.
+  Size: one cell
+
+- polling-delay-passive: The maximum number of milliseconds to wait
+  Type: unsigned	between polls when performing passive cooling.
+  Size: one cell
+
+- thermal-sensors:	A list of thermal sensor phandles and sensor specifier
+  Type: list of 	used while monitoring the thermal zone.
+  phandles + sensor
+  specifier
+
+- trips:		A sub-node which is a container of only trip point nodes
+  Type: sub-node	required to describe the thermal zone.
+
+- cooling-maps:		A sub-node which is a container of only cooling device
+  Type: sub-node	map nodes, used to describe the relation between trips
+			and cooling devices.
+
+Optional property:
+- coefficients:		An array of integers (one signed cell) containing
+  Type: array		coefficients to compose a linear relation between
+  Elem size: one cell	the sensors listed in the thermal-sensors property.
+  Elem type: signed	Coefficients defaults to 1, in case this property
+			is not specified. A simple linear polynomial is used:
+			Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+
+			The coefficients are ordered and they match with sensors
+			by means of sensor ID. Additional coefficients are
+			interpreted as constant offset.
+
+Note: The delay properties are bound to the maximum dT/dt (temperature
+derivative over time) in two situations for a thermal zone:
+(i)  - when passive cooling is activated (polling-delay-passive); and
+(ii) - when the zone just needs to be monitored (polling-delay) or
+when active cooling is activated.
+
+The maximum dT/dt is highly bound to hardware power consumption and dissipation
+capability. The delays should be chosen to account for said max dT/dt,
+such that a device does not cross several trip boundaries unexpectedly
+between polls. Choosing the right polling delays shall avoid having the
+device in temperature ranges that may damage the silicon structures and
+reduce silicon lifetime.
+
+* The thermal-zones node
+
+The "thermal-zones" node is a container for all thermal zone nodes. It shall
+contain only sub-nodes describing thermal zones as in the section
+"Thermal zone nodes". The "thermal-zones" node appears under "/".
+
+* Examples
+
+Below are several examples on how to use thermal data descriptors
+using device tree bindings:
+
+(a) - CPU thermal zone
+
+The CPU thermal zone example below describes how to setup one thermal zone
+using one single sensor as temperature source and many cooling devices and
+power dissipation control sources.
+
+#include <dt-bindings/thermal/thermal.h>
+
+cpus {
+	/*
+	 * Here is an example of describing a cooling device for a DVFS
+	 * capable CPU. The CPU node describes its four OPPs.
+	 * The cooling states possible are 0..3, and they are
+	 * used as OPP indexes. The minimum cooling state is 0, which means
+	 * all four OPPs can be available to the system. The maximum
+	 * cooling state is 3, which means only the lowest OPPs (198MHz@0.85V)
+	 * can be available in the system.
+	 */
+	cpu0: cpu@0 {
+		...
+		operating-points = <
+			/* kHz    uV */
+			970000  1200000
+			792000  1100000
+			396000  950000
+			198000  850000
+		>;
+		cooling-min-state = <0>;
+		cooling-max-state = <3>;
+		#cooling-cells = <2>; /* min followed by max */
+	};
+	...
+};
+
+&i2c1 {
+	...
+	/*
+	 * A simple fan controller which supports 10 speeds of operation
+	 * (represented as 0-9).
+	 */
+	fan0: fan@0x48 {
+		...
+		cooling-min-state = <0>;
+		cooling-max-state = <9>;
+		#cooling-cells = <2>; /* min followed by max */
+	};
+};
+
+ocp {
+	...
+	/*
+	 * A simple IC with a single bandgap temperature sensor.
+	 */
+	bandgap0: bandgap@0x0000ED00 {
+		...
+		#thermal-sensor-cells = <0>;
+	};
+};
+
+thermal-zones {
+	cpu-thermal: cpu-thermal {
+		polling-delay-passive = <250>; /* milliseconds */
+		polling-delay = <1000>; /* milliseconds */
+
+		thermal-sensors = <&bandgap0>;
+
+		trips {
+			cpu-alert0: cpu-alert {
+				temperature = <90000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "active";
+			};
+			cpu-alert1: cpu-alert {
+				temperature = <100000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			cpu-crit: cpu-crit {
+				temperature = <125000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			map0 {
+				trip = <&cpu-alert0>;
+				cooling-device = <&fan0 THERMAL_NO_LIMITS 4>;
+			};
+			map1 {
+				trip = <&cpu-alert1>;
+				cooling-device = <&fan0 5 THERMAL_NO_LIMITS>;
+			};
+			map2 {
+				trip = <&cpu-alert1>;
+				cooling-device =
+				    <&cpu0 THERMAL_NO_LIMITS THERMAL_NO_LIMITS>;
+			};
+		};
+	};
+};
+
+In the example above, the ADC sensor (bandgap0) at address 0x0000ED00 is
+used to monitor the zone 'cpu-thermal' using its sole sensor. A fan
+device (fan0) is controlled via I2C bus 1, at address 0x48, and has ten
+different cooling states 0-9. It is used to remove the heat out of
+the thermal zone 'cpu-thermal' using its cooling states
+from its minimum to 4, when it reaches trip point 'cpu-alert0'
+at 90C, as an example of active cooling. The same cooling device is used at
+'cpu-alert1', but from 5 to its maximum state. The cpu@0 device is also
+linked to the same thermal zone, 'cpu-thermal', as a passive cooling device,
+using all its cooling states at trip point 'cpu-alert1',
+which is a trip point at 100C. On the thermal zone 'cpu-thermal', at the
+temperature of 125C, represented by the trip point 'cpu-crit', the silicon
+is not reliable anymore.
+
+(b) - IC with several internal sensors
+
+The example below describes how to deploy several thermal zones based off a
+single sensor IC, assuming it has several internal sensors. This is a common
+case on SoC designs with several internal IPs that may need different thermal
+requirements, and thus may have their own sensor to monitor or detect internal
+hotspots in their silicon.
+
+#include <dt-bindings/thermal/thermal.h>
+
+ocp {
+	...
+	/*
+	 * A simple IC with several bandgap temperature sensors.
+	 */
+	bandgap0: bandgap@0x0000ED00 {
+		...
+		#thermal-sensor-cells = <1>;
+	};
+};
+
+thermal-zones {
+	cpu-thermal: cpu-thermal {
+		polling-delay-passive = <250>; /* milliseconds */
+		polling-delay = <1000>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&bandgap0     0>;
+
+		trips {
+			/* each zone within the SoC may have its own trips */
+			cpu-alert: cpu-alert {
+				temperature = <100000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			cpu-crit: cpu-crit {
+				temperature = <125000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			/* each zone within the SoC may have its own cooling */
+			...
+		};
+	};
+
+	gpu-thermal: gpu-thermal {
+		polling-delay-passive = <120>; /* milliseconds */
+		polling-delay = <1000>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&bandgap0     1>;
+
+		trips {
+			/* each zone within the SoC may have its own trips */
+			gpu-alert: gpu-alert {
+				temperature = <90000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			gpu-crit: gpu-crit {
+				temperature = <105000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			/* each zone within the SoC may have its own cooling */
+			...
+		};
+	};
+
+	dsp-thermal: dsp-thermal {
+		polling-delay-passive = <50>; /* milliseconds */
+		polling-delay = <1000>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&bandgap0     2>;
+
+		trips {
+			/* each zone within the SoC may have its own trips */
+			dsp-alert: gpu-alert {
+				temperature = <90000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			dsp-crit: gpu-crit {
+				temperature = <135000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			/* each zone within the SoC may have its own cooling */
+			...
+		};
+	};
+};
+
+In the example above, there is one bandgap IC which has the capability to
+monitor three sensors. The hardware has been designed so that sensors are
+placed on different places in the DIE to monitor different temperature
+hotspots: one for CPU thermal zone, one for GPU thermal zone and the
+other to monitor a DSP thermal zone.
+
+Thus, there is a need to assign each sensor provided by the bandgap IC
+to different thermal zones. This is achieved by means of using the
+#thermal-sensor-cells property and using the first cell of the sensor
+specifier as sensor ID. In the example, then, <bandgap 0> is used to
+monitor CPU thermal zone, <bandgap 1> is used to monitor GPU thermal
+zone and <bandgap 2> is used to monitor DSP thermal zone. Each zone
+may be uncorrelated, having its own dT/dt requirements, trips
+and cooling maps.
+
+
+(c) - Several sensors within one single thermal zone
+
+The example below illustrates how to use more than one sensor within
+one thermal zone.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+	...
+	/*
+	 * A simple IC with a single temperature sensor.
+	 */
+	adc: sensor@0x49 {
+		...
+		#thermal-sensor-cells = <0>;
+	};
+};
+
+ocp {
+	...
+	/*
+	 * A simple IC with a single bandgap temperature sensor.
+	 */
+	bandgap0: bandgap@0x0000ED00 {
+		...
+		#thermal-sensor-cells = <0>;
+	};
+};
+
+thermal-zones {
+	cpu-thermal: cpu-thermal {
+		polling-delay-passive = <250>; /* milliseconds */
+		polling-delay = <1000>; /* milliseconds */
+
+		thermal-sensors = <&bandgap0>,	/* cpu */
+				  <&adc>;	/* pcb north */
+
+		/* hotspot = 100 * bandgap - 120 * adc + 484 */
+		coefficients = 		<100	-120	484>;
+
+		trips {
+			...
+		};
+
+		cooling-maps {
+			...
+		};
+	};
+};
+
+In some cases, there is a need to use more than one sensor to extrapolate
+a thermal hotspot in the silicon. The above example illustrates this situation.
+For instance, it may be the case that a sensor external to CPU IP may be placed
+close to CPU hotspot and together with internal CPU sensor, it is used
+to determine the hotspot. Assuming this is the case for the above example,
+the hypothetical extrapolation rule would be:
+		hotspot = 100 * bandgap - 120 * adc + 484
+
+In other context, the same idea can be used to add fixed offset. For instance,
+consider the hotspot extrapolation rule below:
+		hotspot = 1 * adc + 6000
+
+In the above equation, the hotspot is always 6C higher than what is read
+from the ADC sensor. The binding would be then:
+        thermal-sensors =  <&adc>;
+
+		/* hotspot = 1 * adc + 6000 */
+	coefficients = 		<1	6000>;
+
+(d) - Board thermal
+
+The board thermal example below illustrates how to setup one thermal zone
+with many sensors and many cooling devices.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+	...
+	/*
+	 * An IC with several temperature sensor.
+	 */
+	adc-dummy: sensor@0x50 {
+		...
+		#thermal-sensor-cells = <1>; /* sensor internal ID */
+	};
+};
+
+thermal-zones {
+	batt-thermal {
+		polling-delay-passive = <500>; /* milliseconds */
+		polling-delay = <2500>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&adc-dummy     4>;
+
+		trips {
+			...
+		};
+
+		cooling-maps {
+			...
+		};
+	};
+
+	board-thermal: board-thermal {
+		polling-delay-passive = <1000>; /* milliseconds */
+		polling-delay = <2500>; /* milliseconds */
+
+				/* sensor       ID */
+		thermal-sensors = <&adc-dummy     0>, /* pcb top edge */
+				  <&adc-dummy     1>, /* lcd */
+				  <&adc-dymmy     2>; /* back cover */
+		/*
+		 * An array of coefficients describing the sensor
+		 * linear relation. E.g.:
+		 * z = c1*x1 + c2*x2 + c3*x3
+		 */
+		coefficients =		<1200	-345	890>;
+
+		trips {
+			/* Trips are based on resulting linear equation */
+			cpu-trip: cpu-trip {
+				temperature = <60000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			gpu-trip: gpu-trip {
+				temperature = <55000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			}
+			lcd-trip: lcp-trip {
+				temperature = <53000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			crit-trip: crit-trip {
+				temperature = <68000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			map0 {
+				trip = <&cpu-trip>;
+				cooling-device = <&cpu0 0 2>;
+				contribution = <55>;
+			};
+			map1 {
+				trip = <&gpu-trip>;
+				cooling-device = <&gpu0 0 2>;
+				contribution = <20>;
+			};
+			map2 {
+				trip = <&lcd-trip>;
+				cooling-device = <&lcd0 5 10>;
+				contribution = <15>;
+			};
+		};
+	};
+};
+
+The above example is a mix of previous examples, a sensor IP with several internal
+sensors used to monitor different zones, one of them is composed by several sensors and
+with different cooling devices.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1311a48a7367..15b24a2be6b1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1240,6 +1240,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			See comment before ip2_setup() in
 			drivers/char/ip2/ip2base.c.
 
+	irqaffinity=	[SMP] Set the default irq affinity mask
+			Format:
+			<cpu number>,...,<cpu number>
+			or
+			<cpu number>-<cpu number>
+			(must be a positive range in ascending order)
+			or a mixture
+			<cpu number>,...,<cpu number>-<cpu number>
+
 	irqfixup	[HW]
 			When an interrupt is not handled search all handlers
 			for it. Intended to get systems with badly broken
@@ -3345,6 +3354,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that this also can be controlled per-workqueue for
 			workqueues visible under /sys/bus/workqueue/.
 
+	workqueue.power_efficient
+			Per-cpu workqueues are generally preferred because
+			they show better performance thanks to cache
+			locality; unfortunately, per-cpu workqueues tend to
+			be more power hungry than unbound workqueues.
+
+			Enabling this makes the per-cpu workqueues which
+			were observed to contribute significantly to power
+			consumption unbound, leading to measurably lower
+			power usage at the cost of small performance
+			overhead.
+
+			The default value of this parameter is determined by
+			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000000..60f43ff629aa
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+		The Common Mailbox Framework
+		Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+	Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+	Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+	struct mbox_client cl;
+	struct mbox_chan *mbox;
+	struct completion c;
+	bool async;
+	/* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	if (dc->aysnc) {
+		if (is_an_ack(mssg)) {
+			/* An ACK to our last sample sent */
+			return; /* Or do something else here */
+		} else { /* A new message from remote */
+			queue_req(mssg);
+		}
+	} else {
+		/* Remote f/w sends only ACK packets on this channel */
+		return;
+	}
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+	struct demo_client *dc_sync, *dc_async;
+	/* The controller already knows async_pkt and sync_pkt */
+	struct async_pkt ap;
+	struct sync_pkt sp;
+
+	dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+	dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+	/* Populate non-blocking mode client */
+	dc_async->cl.dev = &pdev->dev;
+	dc_async->cl.rx_callback = message_from_remote;
+	dc_async->cl.tx_done = sample_sent;
+	dc_async->cl.tx_block = false;
+	dc_async->cl.tx_tout = 0; /* doesn't matter here */
+	dc_async->cl.knows_txdone = false; /* depending upon protocol */
+	dc_async->async = true;
+	init_completion(&dc_async->c);
+
+	/* Populate blocking mode client */
+	dc_sync->cl.dev = &pdev->dev;
+	dc_sync->cl.rx_callback = message_from_remote;
+	dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+	dc_sync->cl.tx_block = true;
+	dc_sync->cl.tx_tout = 500; /* by half a second */
+	dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+	dc_sync->async = false;
+
+	/* ASync mailbox is listed second in 'mboxes' property */
+	dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+	/* Populate data packet */
+	/* ap.xxx = 123; etc */
+	/* Send async message to remote */
+	mbox_send_message(dc_async->mbox, &ap);
+
+	/* Sync mailbox is listed first in 'mboxes' property */
+	dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+	/* Populate data packet */
+	/* sp.abc = 123; etc */
+	/* Send message to remote in blocking mode */
+	mbox_send_message(dc_sync->mbox, &sp);
+	/* At this point 'sp' has been sent */
+
+	/* Now wait for async chan to be done */
+	wait_for_completion(&dc_async->c);
+}
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 447fd4cd54ec..c8763806c65e 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -203,15 +203,8 @@ using a certain resistor value - pull up and pull down - so that the pin has a
 stable value when nothing is driving the rail it is connected to, or when it's
 unconnected.
 
-Pin configuration can be programmed either using the explicit APIs described
-immediately below, or by adding configuration entries into the mapping table;
-see section "Board/machine configuration" below.
-
-For example, a platform may do the following to pull up a pin to VDD:
-
-#include <linux/pinctrl/consumer.h>
-
-ret = pin_config_set("foo-dev", "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
+Pin configuration can be programmed by adding configuration entries into the
+mapping table; see section "Board/machine configuration" below.
 
 The format and meaning of the configuration parameter, PLATFORM_X_PULL_UP
 above, is entirely defined by the pin controller driver.
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index a71bd5b90fe8..37c54863f611 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -142,6 +142,11 @@ temperature) and throttle appropriate devices.
     This is an optional feature where some platforms can choose not to
     provide this data.
     .governor_name: Name of the thermal governor used for this zone
+    .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface
+               is required. when no_hwmon == false, a hwmon sysfs interface
+               will be created. when no_hwmon == true, nothing will be done.
+               In case the thermal_zone_params is NULL, the hwmon interface
+               will be created (for backward compatibility).
     .num_tbps: Number of thermal_bind_params entries for this zone
     .tbp: thermal_bind_params entries
 
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5f91eda91647..257a1f1eecc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
@@ -280,7 +283,7 @@ kvm_run' (see below).
 4.11 KVM_GET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (out)
 Returns: 0 on success, -1 on error
@@ -301,7 +304,7 @@ struct kvm_regs {
 4.12 KVM_SET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (in)
 Returns: 0 on success, -1 on error
@@ -587,7 +590,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM
+Architectures: x86, ia64, ARM, arm64
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -595,14 +598,14 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM, a GIC is
+only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
 created.
 
 
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm
+Architectures: x86, ia64, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -612,9 +615,10 @@ On some architectures it is required that an interrupt controller model has
 been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
 interrupts require the level to be set to 1 and then back to 0.
 
-ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
-(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
-specific cpus.  The irq field is interpreted like this:
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus.  The irq field is interpreted
+like this:
 
   bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
   field: | irq_type  | vcpu_index |     irq_id     |
@@ -968,18 +972,20 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal
+                                 which has not yet received an INIT signal [x86,
+                                 ia64]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI
+                                 now ready for a SIPI [x86, ia64]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt
+                                 is waiting for an interrupt [x86, ia64]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS)
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.39 KVM_SET_MP_STATE
@@ -993,8 +999,9 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
@@ -1121,9 +1128,9 @@ struct kvm_cpuid2 {
 	struct kvm_cpuid_entry2 entries[0];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 struct kvm_cpuid_entry2 {
 	__u32 function;
@@ -1831,6 +1838,22 @@ ARM 32-bit VFP control registers have the following id bit patterns:
 ARM 64-bit FP registers have the following id bit patterns:
   0x4030 0000 0012 0 <regno:12>
 
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+  0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+  0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+  0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
 4.69 KVM_GET_ONE_REG
 
 Capability: KVM_CAP_ONE_REG
@@ -2264,7 +2287,7 @@ current state.  "addr" is ignored.
 4.77 KVM_ARM_VCPU_INIT
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct struct kvm_vcpu_init (in)
 Returns: 0 on success; -1 on error
@@ -2283,12 +2306,14 @@ should be created before this ioctl is invoked.
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
 	  Depends on KVM_CAP_ARM_PSCI.
+	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
 
 4.78 KVM_GET_REG_LIST
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_reg_list (in/out)
 Returns: 0 on success; -1 on error
@@ -2305,10 +2330,10 @@ This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
 
-4.80 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm
+Architectures: arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_arm_device_address (in)
 Returns: 0 on success, -1 on error
@@ -2329,20 +2354,25 @@ can access emulated or directly exposed devices, which the host kernel needs
 to know about. The id field is an architecture specific identifier for a
 specific device.
 
-ARM divides the id field into two parts, a device id and an address type id
-specific to the individual device.
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
 
   bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
   field: |        0x00000000      |     device id   |  addr type id  |
 
-ARM currently only require this when using the in-kernel GIC support for the
-hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id.  When
-setting the base address for the guest's mapping of the VGIC virtual CPU
-and distributor interface, the ioctl must be called after calling
-KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
-this ioctl twice for any of the base addresses will return -EEXIST.
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id.  When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
+
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
 
-4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
 
 Capability: KVM_CAP_PPC_RTAS
 Architectures: ppc
@@ -2612,6 +2642,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
 external interrupt has just been delivered into the guest. User space
 should put the acknowledged interrupt vector into the 'epr' field.
 
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -2641,6 +2686,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
 };
 
 
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 flags;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+
 6. Capabilities that can be enabled
 -----------------------------------
 
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..df8b0c7540b6
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,83 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+      Base address in the guest physical address space of the GIC distributor
+      register mappings.
+
+    KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+      Base address in the guest physical address space of the GIC virtual cpu
+      interface register mappings.
+
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All distributor regs are (rw, 32-bit)
+
+    The offset is relative to the "Distributor base address" as defined in the
+    GICv2 specs.  Getting or setting such a register has the same effect as
+    reading or writing the register on the actual hardware from the cpu
+    specified with cpu id field.  Note that most distributor fields are not
+    banked, but return the same value regardless of the cpu id used to access
+    the register.
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All CPU interface regs are (rw, 32-bit)
+
+    The offset specifies the offset from the "CPU interface base address" as
+    defined in the GICv2 specs.  Getting or setting such a register has the
+    same effect as reading or writing the register on the actual hardware.
+
+    The Active Priorities Registers APRn are implementation defined, so we set a
+    fixed format for our implementation that fits with the model of a "GICv2
+    implementation without the security extensions" which we present to the
+    guest.  This interface always exposes four register APR[0-3] describing the
+    maximum possible 128 preemption levels.  The semantics of the register
+    indicate if any interrupts in a given preemption level are in the active
+    state by setting the corresponding bit.
+
+    Thus, preemption level X has one or more active interrupts if and only if:
+
+      APRn[X mod 32] == 0b1,  where n = X / 32
+
+    Bits for undefined preemption levels are RAZ/WI.
+
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set, or GIC has already been initialized
+            with default values.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..ba035c33d01c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:		kvm_lock
-Type:		raw_spinlock
+Type:		spinlock_t
 Arch:		any
 Protects:	- vm_list
-		- hardware virtualization enable/disable
+
+Name:		kvm_count_lock
+Type:		raw_spinlock_t
+Arch:		any
+Protects:	- hardware virtualization enable/disable
 Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
 		migration.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 48c748080c96..7433b84439f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4719,6 +4719,15 @@ F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/asm/kvm*
 F:	arch/arm/kvm/
 
+KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
+M:	Marc Zyngier <marc.zyngier@arm.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	kvmarm@lists.cs.columbia.edu
+S:	Maintained
+F:	arch/arm64/include/uapi/asm/kvm*
+F:	arch/arm64/include/asm/kvm*
+F:	arch/arm64/kvm/
+
 KEXEC
 M:	Eric Biederman <ebiederm@xmission.com>
 W:	http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -5152,6 +5161,14 @@ S:	Maintained
 F:	drivers/net/macvlan.c
 F:	include/linux/if_macvlan.h
 
+MAILBOX API
+M:	Jassi Brar <jassisinghbrar@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/mailbox/
+F:	include/linux/mailbox_client.h
+F:	include/linux/mailbox_controller.h
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:	Michael Kerrisk <mtk.manpages@gmail.com>
 W:	http://www.kernel.org/doc/man-pages
@@ -8073,6 +8090,7 @@ S:      Supported
 F:      drivers/thermal/
 F:      include/linux/thermal.h
 F:      include/linux/cpu_cooling.h
+F:      Documentation/devicetree/bindings/thermal/
 
 THINGM BLINK(1) USB RGB LED DRIVER
 M:	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index bdee3a812052..afdd13cf881c 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -40,7 +40,7 @@ struct machine_desc * __init setup_machine_fdt(void *dt)
 	const char *model, *compat;
 	void *clk;
 	char manufacturer[16];
-	unsigned long len;
+	int len;
 
 	/* check device tree validity */
 	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 4a177365b2c4..7991e08d606b 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -157,9 +157,8 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
-	pr_err("%s(%lx, %lx)\n", __func__, start, end);
+	pr_err("%s(%llx, %llx)\n", __func__, start, end);
 }
 #endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d41951246cd6..c1d6aa708ccd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -475,6 +475,7 @@ config ARCH_IXP4XX
 	bool "IXP4xx-based"
 	depends on MMU
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARCH_REQUIRE_GPIOLIB
 	select CLKSRC_MMIO
 	select CPU_XSCALE
@@ -1495,6 +1496,109 @@ config SCHED_SMT
 	  MultiThreading at a cost of slightly increased overhead in some
 	  places. If unsure say N here.
 
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+	bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+	help
+	  Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+	bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+	depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+	help
+	  Experimental scheduler optimizations for heterogeneous platforms.
+	  Attempts to introspectively select task affinity to optimize power
+	  and performance. Basic support for multiple (>2) cpu types is in place,
+	  but it has only been tested with two types of cpus.
+	  There is currently no support for migration of task groups, hence
+	  !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+	  between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+	  When turned on, this option adds sys/kernel/hmp directory which
+	  contains the following files:
+	  up_threshold - the load average threshold used for up migration
+	                 (0 - 1023)
+	  down_threshold - the load average threshold used for down migration
+	                 (0 - 1023)
+	  hmp_domains - a list of cpumasks for the present HMP domains,
+	                starting with the 'biggest' and ending with the
+	                'smallest'.
+	  Note that both the threshold files can be written at runtime to
+	  control scheduler behaviour.
+
+config SCHED_HMP_PRIO_FILTER
+	bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+	depends on SCHED_HMP
+	help
+	  Enables task priority based HMP migration filter. Any task with
+	  a NICE value above the threshold will always be on low-power cpus
+	  with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+	int "NICE priority threshold"
+	default 5
+	depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+	string "HMP scheduler fast CPU mask"
+	depends on SCHED_HMP
+	help
+          Leave empty to use device tree information.
+	  Specify the cpuids of the fast CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+	string "HMP scheduler slow CPU mask"
+	depends on SCHED_HMP
+	help
+	  Leave empty to use device tree information.
+	  Specify the cpuids of the slow CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+	bool "Allows changing the load tracking scale through sysfs"
+	depends on SCHED_HMP
+	help
+	  When turned on, this option exports the load average period value
+	  for the load tracking patches through sysfs.
+	  The values can be modified to change the rate of load accumulation
+	  used for HMP migration. 'load_avg_period_ms' is the time in ms to
+	  reach a load average of 0.5 for an idle task of 0 load average
+	  ratio which becomes 100% busy.
+	  For example, with load_avg_period_ms = 128 and up_threshold = 512,
+	  a running task with a load of 0 will be migrated to a bigger CPU after
+	  128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+	  up_threshold is 0.5.
+	  This patch has the same behavior as changing the Y of the load
+	  average computation to
+	        (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+	  but removes intermediate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+	bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+	depends on SCHED_HMP && CPU_FREQ
+	help
+	  Scales the current load contribution in line with the frequency
+	  of the CPU that the task was executed on.
+	  In this version, we use a simple linear scale derived from the
+	  maximum frequency reported by CPUFreq.
+	  Restricting tracked load to be scaled by the CPU's frequency
+	  represents the consumption of possible compute capacity
+	  (rather than consumption of actual instantaneous capacity as
+	  normal) and allows the HMP migration's simple threshold
+	  migration strategy to interact more predictably with CPUFreq's
+	  asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+	bool "Small task packing for HMP"
+	depends on SCHED_HMP
+	default n
+	help
+	  Allows the HMP Scheduler to pack small tasks into CPUs in the
+	  smallest HMP domain.
+	  Controlled by two sysfs files in sys/kernel/hmp.
+	  packing_enable: 1 to enable, 0 to disable packing. Default 1.
+	  packing_limit: runqueue load ratio where a RQ is considered
+	    to be full. Default is NICE_0_LOAD * 9/8.
+
 config HAVE_ARM_SCU
 	bool
 	help
@@ -1522,6 +1626,31 @@ config MCPM
 	  for (multi-)cluster based systems, such as big.LITTLE based
 	  systems.
 
+config BIG_LITTLE
+	bool "big.LITTLE support (Experimental)"
+	depends on CPU_V7 && SMP
+	select MCPM
+	help
+	  This option enables support for the big.LITTLE architecture.
+
+config BL_SWITCHER
+	bool "big.LITTLE switcher support"
+	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+	select CPU_PM
+	select ARM_CPU_SUSPEND
+	help
+	  The big.LITTLE "switcher" provides the core functionality to
+	  transparently handle transition between a cluster of A15's
+	  and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+	tristate "Simple big.LITTLE switcher user interface"
+	depends on BL_SWITCHER && DEBUG_KERNEL
+	help
+	  This is a simple and dummy char dev interface to control
+	  the big.LITTLE switcher core code.  It is meant for
+	  debugging purposes only.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
@@ -1732,6 +1861,14 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+       def_bool y
+       depends on ARM_LPAE
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 1ba358ba16b8..70bc19e2274f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -16,6 +16,7 @@ LDFLAGS		:=
 LDFLAGS_vmlinux	:=-p --no-undefined -X
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
+LDFLAGS_MODULE	+= --be8
 endif
 
 OBJCOPYFLAGS	:=-O binary -R .comment -S
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 032a8d987148..f6e34be012ff 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -135,6 +135,7 @@ start:
 		.word	_edata			@ zImage end address
  THUMB(		.thumb			)
 1:
+ ARM_BE8(	setend	be )			@ go BE8 if compiled for BE8
 		mrs	r9, cpsr
 #ifdef CONFIG_ARM_VIRT_EXT
 		bl	__hyp_stub_install	@ get into SVC mode, reversibly
@@ -679,9 +680,7 @@ __armv4_mmu_cache_on:
 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 		orr	r0, r0, #0x0030
-#ifdef CONFIG_CPU_ENDIAN_BE8
-		orr	r0, r0, #1 << 25	@ big-endian page tables
-#endif
+ ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 		bl	__common_mmu_cache_on
 		mov	r0, #0
 		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
@@ -708,9 +707,7 @@ __armv7_mmu_cache_on:
 		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 						@ (needed for ARM1176)
 #ifdef CONFIG_MMU
-#ifdef CONFIG_CPU_ENDIAN_BE8
-		orr	r0, r0, #1 << 25	@ big-endian page tables
-#endif
+ ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 		orrne	r0, r0, #1		@ MMU enabled
 		movne	r1, #0xfffffffd		@ domain 0 = client
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index f0895c581a89..00baf9f5766a 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -202,7 +202,14 @@ dtb-$(CONFIG_ARCH_VERSATILE) += versatile-ab.dtb \
 dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \
 	vexpress-v2p-ca9.dtb \
 	vexpress-v2p-ca15-tc1.dtb \
-	vexpress-v2p-ca15_a7.dtb
+	vexpress-v2p-ca15_a7.dtb \
+	rtsm_ve-cortex_a9x2.dtb \
+	rtsm_ve-cortex_a9x4.dtb \
+	rtsm_ve-cortex_a15x1.dtb \
+	rtsm_ve-cortex_a15x2.dtb \
+	rtsm_ve-cortex_a15x4.dtb \
+	rtsm_ve-v2p-ca15x1-ca7x1.dtb \
+	rtsm_ve-v2p-ca15x4-ca7x4.dtb
 dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb
 dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \
 	wm8505-ref.dtb \
diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi
new file mode 100644
index 000000000000..0b0ff6ead4b2
--- /dev/null
+++ b/arch/arm/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+	panels {
+		panel@0 {
+			compatible	= "panel";
+			mode		= "VGA";
+			refresh		= <60>;
+			xres		= <640>;
+			yres		= <480>;
+			pixclock	= <39721>;
+			left_margin	= <40>;
+			right_margin	= <24>;
+			upper_margin	= <32>;
+			lower_margin	= <11>;
+			hsync_len	= <96>;
+			vsync_len	= <2>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+
+		panel@1 {
+			compatible	= "panel";
+			mode		= "XVGA";
+			refresh		= <60>;
+			xres		= <1024>;
+			yres		= <768>;
+			pixclock	= <15748>;
+			left_margin	= <152>;
+			right_margin	= <48>;
+			upper_margin	= <23>;
+			lower_margin	= <3>;
+			hsync_len	= <104>;
+			vsync_len	= <4>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
new file mode 100644
index 000000000000..c9eee916aa7e
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
@@ -0,0 +1,159 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x1CT
+ *
+ * RTSM_VE_Cortex_A15x1.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x1";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
new file mode 100644
index 000000000000..853a166e3c32
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
@@ -0,0 +1,165 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x2CT
+ *
+ * RTSM_VE_Cortex_A15x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x2";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
new file mode 100644
index 000000000000..c1947a3a5c88
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
@@ -0,0 +1,177 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ *
+ * RTSM_VE_Cortex_A15x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <3>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
new file mode 100644
index 000000000000..fca6b2f79677
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
@@ -0,0 +1,171 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx2CT
+ *
+ * RTSM_VE_Cortex_A9x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA9x2";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <1>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x80000000>;
+	};
+
+	scu@2c000000 {
+		compatible = "arm,cortex-a9-scu";
+		reg = <0x2c000000 0x58>;
+	};
+
+	timer@2c000600 {
+		compatible = "arm,cortex-a9-twd-timer";
+		reg = <0x2c000600 0x20>;
+		interrupts = <1 13 0xf04>;
+	};
+
+	watchdog@2c000620 {
+		compatible = "arm,cortex-a9-twd-wdt";
+		reg = <0x2c000620 0x20>;
+		interrupts = <1 14 0xf04>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x2c001000 0x1000>,
+		      <0x2c000100 0x100>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0x08000000 0x04000000>,
+			 <1 0 0x14000000 0x04000000>,
+			 <2 0 0x18000000 0x04000000>,
+			 <3 0 0x1c000000 0x04000000>,
+			 <4 0 0x0c000000 0x04000000>,
+			 <5 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
new file mode 100644
index 000000000000..fd8a6ed97a04
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
@@ -0,0 +1,183 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx4CT
+ *
+ * RTSM_VE_Cortex_A9x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA9x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <1>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <3>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x80000000>;
+	};
+
+	scu@2c000000 {
+		compatible = "arm,cortex-a9-scu";
+		reg = <0x2c000000 0x58>;
+	};
+
+	timer@2c000600 {
+		compatible = "arm,cortex-a9-twd-timer";
+		reg = <0x2c000600 0x20>;
+		interrupts = <1 13 0xf04>;
+	};
+
+	watchdog@2c000620 {
+		compatible = "arm,cortex-a9-twd-wdt";
+		reg = <0x2c000620 0x20>;
+		interrupts = <1 14 0xf04>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x2c001000 0x1000>,
+		      <0x2c000100 0x100>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0x08000000 0x04000000>,
+			 <1 0 0x14000000 0x04000000>,
+			 <2 0 0x18000000 0x04000000>,
+			 <3 0 0x1c000000 0x04000000>,
+			 <4 0 0x0c000000 0x04000000>,
+			 <5 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
new file mode 100644
index 000000000000..a2d895ee5faa
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
@@ -0,0 +1,231 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * Motherboard component
+ *
+ * VEMotherBoard.lisa
+ */
+
+	motherboard {
+		compatible = "arm,vexpress,v2m-p1", "simple-bus";
+		arm,hbi = <0x190>;
+		arm,vexpress,site = <0>;
+		arm,v2m-memory-map = "rs1";
+		#address-cells = <2>; /* SMB chipselect number and offset */
+		#size-cells = <1>;
+		#interrupt-cells = <1>;
+		ranges;
+
+		flash@0,00000000 {
+			compatible = "arm,vexpress-flash", "cfi-flash";
+			reg = <0 0x00000000 0x04000000>,
+			      <4 0x00000000 0x04000000>;
+			bank-width = <4>;
+		};
+
+		vram@2,00000000 {
+			compatible = "arm,vexpress-vram";
+			reg = <2 0x00000000 0x00800000>;
+		};
+
+		ethernet@2,02000000 {
+			compatible = "smsc,lan91c111";
+			reg = <2 0x02000000 0x10000>;
+			interrupts = <15>;
+		};
+
+		iofpga@3,00000000 {
+			compatible = "arm,amba-bus", "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 3 0 0x200000>;
+
+			v2m_sysreg: sysreg@010000 {
+				compatible = "arm,vexpress-sysreg";
+				reg = <0x010000 0x1000>;
+				gpio-controller;
+				#gpio-cells = <2>;
+			};
+
+			v2m_sysctl: sysctl@020000 {
+				compatible = "arm,sp810", "arm,primecell";
+				reg = <0x020000 0x1000>;
+				clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>;
+				clock-names = "refclk", "timclk", "apb_pclk";
+				#clock-cells = <1>;
+				clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+			};
+
+			aaci@040000 {
+				compatible = "arm,pl041", "arm,primecell";
+				reg = <0x040000 0x1000>;
+				interrupts = <11>;
+				clocks = <&smbclk>;
+				clock-names = "apb_pclk";
+			};
+
+			mmci@050000 {
+				compatible = "arm,pl180", "arm,primecell";
+				reg = <0x050000 0x1000>;
+				interrupts = <9 10>;
+				cd-gpios = <&v2m_sysreg 0 0>;
+				wp-gpios = <&v2m_sysreg 1 0>;
+				max-frequency = <12000000>;
+				vmmc-supply = <&v2m_fixed_3v3>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "mclk", "apb_pclk";
+			};
+
+			kmi@060000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x060000 0x1000>;
+				interrupts = <12>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			kmi@070000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x070000 0x1000>;
+				interrupts = <13>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			v2m_serial0: uart@090000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x090000 0x1000>;
+				interrupts = <5>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial1: uart@0a0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0a0000 0x1000>;
+				interrupts = <6>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial2: uart@0b0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0b0000 0x1000>;
+				interrupts = <7>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial3: uart@0c0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0c0000 0x1000>;
+				interrupts = <8>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			wdt@0f0000 {
+				compatible = "arm,sp805", "arm,primecell";
+				reg = <0x0f0000 0x1000>;
+				interrupts = <0>;
+				clocks = <&v2m_refclk32khz>, <&smbclk>;
+				clock-names = "wdogclk", "apb_pclk";
+			};
+
+			v2m_timer01: timer@110000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x110000 0x1000>;
+				interrupts = <2>;
+				clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			v2m_timer23: timer@120000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x120000 0x1000>;
+				interrupts = <3>;
+				clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			rtc@170000 {
+				compatible = "arm,pl031", "arm,primecell";
+				reg = <0x170000 0x1000>;
+				interrupts = <4>;
+				clocks = <&smbclk>;
+				clock-names = "apb_pclk";
+			};
+
+			clcd@1f0000 {
+				compatible = "arm,pl111", "arm,primecell";
+				reg = <0x1f0000 0x1000>;
+				interrupts = <14>;
+				clocks = <&v2m_oscclk1>, <&smbclk>;
+				clock-names = "v2m:oscclk1", "apb_pclk";
+				mode = "VGA";
+				use_dma = <0>;
+				framebuffer = <0x18000000 0x00180000>;
+			};
+
+			virtio_block@0130000 {
+				compatible = "virtio,mmio";
+				reg = <0x130000 0x200>;
+				interrupts = <42>;
+			};
+
+		};
+
+		v2m_fixed_3v3: fixedregulator@0 {
+			compatible = "regulator-fixed";
+			regulator-name = "3V3";
+			regulator-min-microvolt = <3300000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-always-on;
+		};
+
+		v2m_clk24mhz: clk24mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <24000000>;
+			clock-output-names = "v2m:clk24mhz";
+		};
+
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "v2m:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "v2m:refclk32khz";
+		};
+
+		mcc {
+			compatible = "simple-bus";
+			arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+			v2m_oscclk1: osc@1 {
+				/* CLCD clock */
+				compatible = "arm,vexpress-osc";
+				arm,vexpress-sysreg,func = <1 1>;
+				freq-range = <23750000 63500000>;
+				#clock-cells = <0>;
+				clock-output-names = "v2m:oscclk1";
+			};
+
+			muxfpga@0 {
+				compatible = "arm,vexpress-muxfpga";
+				arm,vexpress-sysreg,func = <7 0>;
+			};
+
+			shutdown@0 {
+				compatible = "arm,vexpress-shutdown";
+				arm,vexpress-sysreg,func = <8 0>;
+			};
+		};
+	};
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
new file mode 100644
index 000000000000..fe8cf5dc8570
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
@@ -0,0 +1,244 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x1_A7x1.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+	model = "RTSM_VE_CortexA15x1-A7x1";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	clusters {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cluster0: cluster@0 {
+			reg = <0>;
+//			freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+			};
+		};
+
+		cluster1: cluster@1 {
+			reg = <1>;
+//			freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core1: core@0 {
+					reg = <0>;
+				};
+
+			};
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core1>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400", "arm,cci";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	dcscb@60000000 {
+		compatible = "arm,rtsm,dcscb";
+		reg = <0 0x60000000 0 0x1000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
new file mode 100644
index 000000000000..f715285131d8
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
@@ -0,0 +1,358 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x4_A7x4.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+	model = "RTSM_VE_CortexA15x4-A7x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	clusters {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cluster0: cluster@0 {
+			reg = <0>;
+//			freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+				core1: core@1 {
+					reg = <1>;
+				};
+
+				core2: core@2 {
+					reg = <2>;
+				};
+
+				core3: core@3 {
+					reg = <3>;
+				};
+
+			};
+		};
+
+		cluster1: cluster@1 {
+			reg = <1>;
+//			freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core4: core@0 {
+					reg = <0>;
+				};
+
+				core5: core@1 {
+					reg = <1>;
+				};
+
+				core6: core@2 {
+					reg = <2>;
+				};
+				
+				core7: core@3 {
+					reg = <3>;
+				};
+				
+			};
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+			cluster = <&cluster0>;
+			core = <&core1>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <2>;
+			cluster = <&cluster0>;
+			core = <&core2>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <3>;
+			cluster = <&cluster0>;
+			core = <&core3>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu4: cpu@4 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core4>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+
+		cpu5: cpu@5 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x101>;
+			cluster = <&cluster1>;
+			core = <&core5>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+		
+		cpu6: cpu@6 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x102>;
+			cluster = <&cluster1>;
+			core = <&core6>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+		
+		cpu7: cpu@7 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x103>;
+			cluster = <&cluster1>;
+			core = <&core7>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400", "arm,cci";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	dcscb@60000000 {
+		compatible = "arm,rtsm,dcscb";
+		reg = <0 0x60000000 0 0x1000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+		gic-cpuif@2 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <2>;
+			cpu = <&cpu2>;
+		};
+		gic-cpuif@3 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <3>;
+			cpu = <&cpu3>;
+		};
+		gic-cpuif@4 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <4>;
+			cpu = <&cpu4>;
+		};
+		gic-cpuif@5 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <5>;
+			cpu = <&cpu5>;
+		};
+		gic-cpuif@6 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <6>;
+			cpu = <&cpu6>;
+		};
+		gic-cpuif@7 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <7>;
+			cpu = <&cpu7>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
index ac870fb3fa0d..9584232ee6b6 100644
--- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
@@ -228,6 +228,7 @@
 			};
 
 			clcd@1f0000 {
+				status = "disabled";
 				compatible = "arm,pl111", "arm,primecell";
 				reg = <0x1f0000 0x1000>;
 				interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi
index f1420368355b..6593398c11ae 100644
--- a/arch/arm/boot/dts/vexpress-v2m.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m.dtsi
@@ -227,6 +227,7 @@
 			};
 
 			clcd@1f000 {
+				status = "disabled";
 				compatible = "arm,pl111", "arm,primecell";
 				reg = <0x1f000 0x1000>;
 				interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
index 9420053acc14..cc6a8c0cfe33 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/memreserve/ 0xbf000000 0x01000000;
+
 / {
 	model = "V2P-CA15";
 	arm,hbi = <0x237>;
@@ -57,6 +59,8 @@
 		interrupts = <0 85 4>;
 		clocks = <&oscclk5>;
 		clock-names = "pxlclk";
+		mode = "1024x768-16@60";
+		framebuffer = <0 0xff000000 0 0x01000000>;
 	};
 
 	memory-controller@2b0a0000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index d2803be4e1a8..f1dc620c5c45 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -9,11 +9,13 @@
 
 /dts-v1/;
 
+/memreserve/ 0xff000000 0x01000000;
+
 / {
 	model = "V2P-CA15_CA7";
 	arm,hbi = <0x249>;
 	arm,vexpress,site = <0xf>;
-	compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress";
+	compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic";
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -29,44 +31,106 @@
 		i2c1 = &v2m_i2c_pcie;
 	};
 
-	cpus {
+	clusters {
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu0: cpu@0 {
-			device_type = "cpu";
-			compatible = "arm,cortex-a15";
+		cluster0: cluster@0 {
 			reg = <0>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+				core1: core@1 {
+					reg = <1>;
+				};
+
+			};
 		};
 
-		cpu1: cpu@1 {
-			device_type = "cpu";
-			compatible = "arm,cortex-a15";
+		cluster1: cluster@1 {
 			reg = <1>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core2: core@0 {
+					reg = <0>;
+				};
+
+				core3: core@1 {
+					reg = <1>;
+				};
+
+				core4: core@2 {
+					reg = <2>;
+				};
+			};
 		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 
 		cpu2: cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core2>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu3: cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x101>;
+			cluster = <&cluster1>;
+			core = <&core3>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu4: cpu@4 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x102>;
+			cluster = <&cluster1>;
+			core = <&core4>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+			cluster = <&cluster0>;
+			core = <&core1>;
+			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
 		};
 	};
 
 	memory@80000000 {
 		device_type = "memory";
-		reg = <0 0x80000000 0 0x40000000>;
+		reg = <0 0x80000000 0 0x80000000>;
 	};
 
 	wdt@2a490000 {
@@ -81,6 +145,8 @@
 		compatible = "arm,hdlcd";
 		reg = <0 0x2b000000 0 0x1000>;
 		interrupts = <0 85 4>;
+		mode = "1024x768-16@60";
+		framebuffer = <0 0xff000000 0 0x01000000>;
 		clocks = <&oscclk5>;
 		clock-names = "pxlclk";
 	};
@@ -102,6 +168,64 @@
 		      <0 0x2c004000 0 0x2000>,
 		      <0 0x2c006000 0 0x2000>;
 		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+		gic-cpuif@2 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <2>;
+			cpu = <&cpu2>;
+		};
+
+		gic-cpuif@3 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <3>;
+			cpu = <&cpu3>;
+		};
+
+		gic-cpuif@4 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <4>;
+			cpu = <&cpu4>;
+		};
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	cci-pmu@2c099000 {
+		compatible = "arm,cci-400-pmu";
+		reg = <0 0x2c099000 0 0x6000>;
+		interrupts = <0 101 4>,
+			     <0 102 4>,
+			     <0 103 4>,
+			     <0 104 4>,
+			     <0 105 4>;
 	};
 
 	memory-controller@7ffd0000 {
@@ -125,6 +249,12 @@
 		clock-names = "apb_pclk";
 	};
 
+	spc@7fff0000 {
+		compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+		reg = <0 0x7fff0000 0 0x1000>;
+		interrupts = <0 95 4>;
+	};
+
 	timer {
 		compatible = "arm,armv7-timer";
 		interrupts = <1 13 0xf08>,
@@ -133,12 +263,21 @@
 			     <1 10 0xf08>;
 	};
 
-	pmu {
+	pmu_a15 {
 		compatible = "arm,cortex-a15-pmu";
+		cluster  = <&cluster0>;
 		interrupts = <0 68 4>,
 			     <0 69 4>;
 	};
 
+	pmu_a7 {
+		compatible = "arm,cortex-a7-pmu";
+		cluster  = <&cluster1>;
+		interrupts = <0 128 4>,
+			     <0 129 4>,
+			     <0 130 4>;
+	};
+
 	oscclk6a: oscclk6a {
 		/* Reference 24MHz clock */
 		compatible = "fixed-clock";
@@ -147,6 +286,15 @@
 		clock-output-names = "oscclk6a";
 	};
 
+	psci {
+		compatible      = "arm,psci";
+		method          = "smc";
+		cpu_suspend     = <0x80100001>;
+		cpu_off         = <0x80100002>;
+		cpu_on          = <0x80100003>;
+		migrate         = <0x80100004>;
+	};
+
 	dcc {
 		compatible = "arm,vexpress,config-bus";
 		arm,vexpress,config-bridge = <&v2m_sysreg>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
index c544a5504591..cf633ed6a1b4 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/memreserve/ 0xbf000000 0x01000000;
+
 / {
 	model = "V2P-CA5s";
 	arm,hbi = <0x225>;
@@ -59,6 +61,8 @@
 		interrupts = <0 85 4>;
 		clocks = <&oscclk3>;
 		clock-names = "pxlclk";
+		mode = "640x480-16@60";
+		framebuffer = <0xbf000000 0x01000000>;
 	};
 
 	memory-controller@2a150000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 62d9b225dcce..f83706bd3f9a 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/include/ "clcd-panels.dtsi"
+
 / {
 	model = "V2P-CA9";
 	arm,hbi = <0x191>;
@@ -73,6 +75,8 @@
 		interrupts = <0 44 4>;
 		clocks = <&oscclk1>, <&oscclk2>;
 		clock-names = "clcdclk", "apb_pclk";
+		mode = "XVGA";
+		use_dma = <1>;
 	};
 
 	memory-controller@100e0000 {
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 48434cbe3e89..462cd580fc2d 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -14,5 +14,9 @@ obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
+
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 000000000000..8fee70dfb302
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,864 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by:	Nicolas Pitre, March 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here as the value _does_ change along some code paths.
+ */
+
+static int read_mpidr(void)
+{
+	unsigned int id;
+	asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+	return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+	struct timespec ts;
+	getnstimeofday(&ts);
+	return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+	unsigned ib_mpidr, ib_cpu, ib_cluster;
+	long volatile handshake, **handshake_ptr = _arg;
+
+	pr_debug("%s\n", __func__);
+
+	ib_mpidr = cpu_logical_map(smp_processor_id());
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	/* Advertise our handshake location */
+	if (handshake_ptr) {
+		handshake = 0;
+		*handshake_ptr = &handshake;
+	} else
+		handshake = -1;
+
+	/*
+	 * Our state has been saved at this point.  Let's release our
+	 * inbound CPU.
+	 */
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
+	sev();
+
+	/*
+	 * From this point, we must assume that our counterpart CPU might
+	 * have taken over in its parallel world already, as if execution
+	 * just returned from cpu_suspend().  It is therefore important to
+	 * be very careful not to make any change the other guy is not
+	 * expecting.  This is why we need stack isolation.
+	 *
+	 * Fancy under cover tasks could be performed here.  For now
+	 * we have none.
+	 */
+
+	/*
+	 * Let's wait until our inbound is alive.
+	 */
+	while (!handshake) {
+		wfe();
+		smp_mb();
+	}
+
+	/* Let's put ourself down. */
+	mcpm_cpu_power_down();
+
+	/* should never get here */
+	BUG();
+}
+
+/*
+ * Stack isolation.  To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+	unsigned int mpidr = read_mpidr();
+	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	void *stack = current_thread_info() + 1;
+	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+	stack += clusterid * STACK_SIZE + STACK_SIZE;
+	call_with_stack(bL_do_switch, (void *)_arg, stack);
+	BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+	unsigned int mpidr, this_cpu, that_cpu;
+	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+	struct completion inbound_alive;
+	struct tick_device *tdev;
+	enum clock_event_mode tdev_mode;
+	long volatile *handshake_ptr;
+	int ipi_nr, ret;
+
+	this_cpu = smp_processor_id();
+	ob_mpidr = read_mpidr();
+	ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+	ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+	BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
+
+	if (new_cluster_id == ob_cluster)
+		return 0;
+
+	that_cpu = bL_switcher_cpu_pairing[this_cpu];
+	ib_mpidr = cpu_logical_map(that_cpu);
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+		 this_cpu, ob_mpidr, ib_mpidr);
+
+	this_cpu = smp_processor_id();
+
+	/* Close the gate for our entry vectors */
+	mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+
+	/* Install our "inbound alive" notifier. */
+	init_completion(&inbound_alive);
+	ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+	ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+	/*
+	 * Let's wake up the inbound CPU now in case it requires some delay
+	 * to come online, but leave it gated in our entry vector code.
+	 */
+	ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
+	if (ret) {
+		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
+	 * in a possible WFI, such as in bL_power_down().
+	 */
+	gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+	/*
+	 * Wait for the inbound to come up.  This allows for other
+	 * tasks to be scheduled in the mean time.
+	 */
+	wait_for_completion(&inbound_alive);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+	/*
+	 * From this point we are entering the switch critical zone
+	 * and can't sleep/schedule anymore.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+	trace_cpu_migrate_begin(get_ns(), ob_mpidr);
+
+	/* redirect GIC's SGIs to our counterpart */
+	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
+
+	tdev = tick_get_device(this_cpu);
+	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+		tdev = NULL;
+	if (tdev) {
+		tdev_mode = tdev->evtdev->mode;
+		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	}
+
+	ret = cpu_pm_enter();
+
+	/* we can not tolerate errors at this point */
+	if (ret)
+		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+	/*
+	 * Swap the physical CPUs in the logical map for this logical CPU.
+	 * This must be flushed to RAM as the resume code
+	 * needs to access it while the caches are still disabled.
+	 */
+	cpu_logical_map(this_cpu) = ib_mpidr;
+	cpu_logical_map(that_cpu) = ob_mpidr;
+	sync_cache_w(&cpu_logical_map(this_cpu));
+
+	/* Let's do the actual CPU switch. */
+	ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+	if (ret > 0)
+		panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+	/* We are executing on the inbound CPU at this point */
+	mpidr = read_mpidr();
+	pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+	BUG_ON(mpidr != ib_mpidr);
+
+	mcpm_cpu_powered_up();
+
+	ret = cpu_pm_exit();
+
+	if (tdev) {
+		clockevents_set_mode(tdev->evtdev, tdev_mode);
+		clockevents_program_event(tdev->evtdev,
+					  tdev->evtdev->next_event, 1);
+	}
+
+	trace_cpu_migrate_finish(get_ns(), ib_mpidr);
+	local_fiq_enable();
+	local_irq_enable();
+
+	*handshake_ptr = 1;
+	dsb_sev();
+
+	if (ret)
+		pr_err("%s exiting with error %d\n", __func__, ret);
+	return ret;
+}
+
+struct bL_thread {
+	spinlock_t lock;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	int wanted_cluster;
+	struct completion started;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+};
+
+static struct bL_thread bL_threads[NR_CPUS];
+
+static int bL_switcher_thread(void *arg)
+{
+	struct bL_thread *t = arg;
+	struct sched_param param = { .sched_priority = 1 };
+	int cluster;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	complete(&t->started);
+
+	do {
+		if (signal_pending(current))
+			flush_signals(current);
+		wait_event_interruptible(t->wq,
+				t->wanted_cluster != -1 ||
+				kthread_should_stop());
+
+		spin_lock(&t->lock);
+		cluster = t->wanted_cluster;
+		completer = t->completer;
+		completer_cookie = t->completer_cookie;
+		t->wanted_cluster = -1;
+		t->completer = NULL;
+		spin_unlock(&t->lock);
+
+		if (cluster != -1) {
+			bL_switch_to(cluster);
+
+			if (completer)
+				completer(completer_cookie);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static struct task_struct * bL_switcher_thread_create(int cpu, void *arg)
+{
+	struct task_struct *task;
+
+	task = kthread_create_on_node(bL_switcher_thread, arg,
+				      cpu_to_node(cpu), "kswitcher_%d", cpu);
+	if (!IS_ERR(task)) {
+		kthread_bind(task, cpu);
+		wake_up_process(task);
+	} else
+		pr_err("%s failed for CPU %d\n", __func__, cpu);
+	return task;
+}
+
+/*
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ *      with completion notification via a callback
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback.  if non-NULL,
+ *	@completer(@completer_cookie) will be called on completion of
+ *	the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread.  This function may or may not return
+ * before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete.  This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns.  When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
+ */
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+			 bL_switch_completion_handler completer,
+			 void *completer_cookie)
+{
+	struct bL_thread *t;
+
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return -EINVAL;
+	}
+
+	t = &bL_threads[cpu];
+
+	if (IS_ERR(t->task))
+		return PTR_ERR(t->task);
+	if (!t->task)
+		return -ESRCH;
+
+	spin_lock(&t->lock);
+	if (t->completer) {
+		spin_unlock(&t->lock);
+		return -EBUSY;
+	}
+	t->completer = completer;
+	t->completer_cookie = completer_cookie;
+	t->wanted_cluster = new_cluster_id;
+	spin_unlock(&t->lock);
+	wake_up(&t->wq);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
+
+/*
+ * Detach an outstanding switch request.
+ *
+ * The switcher will continue with the switch request in the background,
+ * but the completer function will not be called.
+ *
+ * This may be necessary if the completer is in a kernel module which is
+ * about to be unloaded.
+ */
+void bL_switch_request_detach(unsigned int cpu,
+			      bL_switch_completion_handler completer)
+{
+	struct bL_thread *t;
+
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return;
+	}
+
+	t = &bL_threads[cpu];
+
+	if (IS_ERR(t->task) || !t->task)
+		return;
+
+	spin_lock(&t->lock);
+	if (t->completer == completer)
+		t->completer = NULL;
+	spin_unlock(&t->lock);
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_detach);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+	int ret;
+       
+	ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+	if (ret & NOTIFY_STOP_MASK)
+		pr_err("%s: notifier chain failed with status 0x%x\n",
+			__func__, ret);
+	return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+	int i;
+
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+		cpu_up(i);
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+	int i, j, cluster_0, gic_id, ret;
+	unsigned int cpu, cluster, mask;
+	cpumask_t available_cpus;
+
+	/* First pass to validate what we have */
+	mask = 0;
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster >= 2) {
+			pr_err("%s: only dual cluster systems are supported\n", __func__);
+			return -EINVAL;
+		}
+		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+			return -EINVAL;
+		mask |= (1 << cluster);
+	}
+	if (mask != 3) {
+		pr_err("%s: no CPU pairing possible\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Now let's do the pairing.  We match each CPU with another CPU
+	 * from a different cluster.  To get a uniform scheduling behavior
+	 * without fiddling with CPU topology and compute capacity data,
+	 * we'll use logical CPUs initially belonging to the same cluster.
+	 */
+	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+	cpumask_copy(&available_cpus, cpu_online_mask);
+	cluster_0 = -1;
+	for_each_cpu(i, &available_cpus) {
+		int match = -1;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster_0 == -1)
+			cluster_0 = cluster;
+		if (cluster != cluster_0)
+			continue;
+		cpumask_clear_cpu(i, &available_cpus);
+		for_each_cpu(j, &available_cpus) {
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
+			/*
+			 * Let's remember the last match to create "odd"
+			 * pairing on purpose in order for other code not
+			 * to assume any relation between physical and
+			 * logical CPU numbers.
+			 */
+			if (cluster != cluster_0)
+				match = j;
+		}
+		if (match != -1) {
+			bL_switcher_cpu_pairing[i] = match;
+			cpumask_clear_cpu(match, &available_cpus);
+			pr_info("CPU%d paired with CPU%d\n", i, match);
+		}
+	}
+
+	/*
+	 * Now we disable the unwanted CPUs i.e. everything that has no
+	 * pairing information (that includes the pairing counterparts).
+	 */ 
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+		/* Let's take note of the GIC ID for this CPU */
+		gic_id = gic_get_cpu_id(i);
+		if (gic_id < 0) {
+			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+			bL_switcher_restore_cpus();
+			return -EINVAL;
+		}
+		bL_gic_id[cpu][cluster] = gic_id;
+		pr_info("GIC ID for CPU %u cluster %u is %u\n",
+			cpu, cluster, gic_id);
+
+		if (bL_switcher_cpu_pairing[i] != -1) {
+			bL_switcher_cpu_original_cluster[i] = cluster;
+			continue;
+		}
+
+		ret = cpu_down(i);
+		if (ret) {
+			bL_switcher_restore_cpus();
+			return ret;
+		}
+		cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+	}
+
+	return 0;
+}
+
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+	int cpu;
+
+	if (!bL_switcher_active)
+		return -EUNATCH;
+
+	mpidr &= MPIDR_HWID_BITMASK;
+	for_each_online_cpu(cpu) {
+		int pairing = bL_switcher_cpu_pairing[cpu];
+		if (pairing == -1)
+			continue;
+		if ((mpidr == cpu_logical_map(cpu)) ||
+		    (mpidr == cpu_logical_map(pairing)))
+			return cpu;
+	}
+	return -EINVAL;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+	trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+int bL_switcher_trace_trigger(void)
+{
+	int ret;
+
+	preempt_disable();
+
+	bL_switcher_trace_trigger_cpu(NULL);
+	ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+	int cpu, ret;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+	if (bL_switcher_active) {
+		cpu_hotplug_driver_unlock();
+		mutex_unlock(&bL_switcher_activation_lock);
+		return 0;
+	}
+
+	pr_info("big.LITTLE switcher initializing\n");
+
+	ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+	if (ret)
+		goto error;
+
+	ret = bL_switcher_halve_cpus();
+	if (ret)
+		goto error;
+
+	bL_switcher_trace_trigger();
+
+	for_each_online_cpu(cpu) {
+		struct bL_thread *t = &bL_threads[cpu];
+		spin_lock_init(&t->lock);
+		init_waitqueue_head(&t->wq);
+		init_completion(&t->started);
+		t->wanted_cluster = -1;
+		t->task = bL_switcher_thread_create(cpu, t);
+	}
+
+	bL_switcher_active = 1;
+	bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+	pr_info("big.LITTLE switcher initialized\n");
+	goto out;
+
+error:
+	pr_warning("big.LITTLE switcher initialization failed\n");
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+	return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+	unsigned int cpu, cluster;
+	struct bL_thread *t;
+	struct task_struct *task;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+
+	if (!bL_switcher_active)
+		goto out;
+
+	if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+		bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+		goto out;
+	}
+
+	bL_switcher_active = 0;
+
+	/*
+	 * To deactivate the switcher, we must shut down the switcher
+	 * threads to prevent any other requests from being accepted.
+	 * Then, if the final cluster for given logical CPU is not the
+	 * same as the original one, we'll recreate a switcher thread
+	 * just for the purpose of switching the CPU back without any
+	 * possibility for interference from external requests.
+	 */
+	for_each_online_cpu(cpu) {
+		t = &bL_threads[cpu];
+		task = t->task;
+		t->task = NULL;
+		if (!task || IS_ERR(task))
+			continue;
+		kthread_stop(task);
+		/* no more switch may happen on this CPU at this point */
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+		if (cluster == bL_switcher_cpu_original_cluster[cpu])
+			continue;
+		init_completion(&t->started);
+		t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+		task = bL_switcher_thread_create(cpu, t);
+		if (!IS_ERR(task)) {
+			wait_for_completion(&t->started);
+			kthread_stop(task);
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+			if (cluster == bL_switcher_cpu_original_cluster[cpu])
+				continue;
+		}
+		/* If execution gets here, we're in trouble. */
+		pr_crit("%s: unable to restore original cluster for CPU %d\n",
+			__func__, cpu);
+		pr_crit("%s: CPU %d can't be restored\n",
+			__func__, bL_switcher_cpu_pairing[cpu]);
+		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+				  &bL_switcher_removed_logical_cpus);
+	}
+
+	bL_switcher_restore_cpus();
+	bL_switcher_trace_trigger();
+
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+
+	switch (buf[0]) {
+	case '0':
+		bL_switcher_disable();
+		ret = 0;
+		break;
+	case '1':
+		ret = bL_switcher_enable();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret = bL_switcher_trace_trigger();
+
+	return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+	__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+	__ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+	&bL_switcher_active_attr.attr,
+	&bL_switcher_trace_trigger_attr.attr,
+	NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+	.attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+	int ret;
+
+	bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+	if (!bL_switcher_kobj)
+		return -ENOMEM;
+	ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+	if (ret)
+		kobject_put(bL_switcher_kobj);
+	return ret;
+}
+
+#endif  /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+	mutex_lock(&bL_switcher_activation_lock);
+
+	return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_DOWN_PREPARE:
+		if (bL_switcher_active)
+			return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+        { &bL_switcher_hotplug_callback, NULL, 0 };
+
+#ifdef CONFIG_SCHED_HMP
+static bool no_bL_switcher = true;
+#else
+static bool no_bL_switcher;
+#endif
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+	int ret;
+
+	if (MAX_NR_CLUSTERS != 2) {
+		pr_err("%s: only dual cluster systems are supported\n", __func__);
+		return -EINVAL;
+	}
+
+	register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
+	if (!no_bL_switcher) {
+		ret = bL_switcher_enable();
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_SYSFS
+	ret = bL_switcher_sysfs_init();
+	if (ret)
+		pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+	return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 000000000000..5e2dd197e728
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by:	Nicolas Pitre, November 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	unsigned char val[3];
+	unsigned int cpu, cluster;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+
+	if (len < 3)
+		return -EINVAL;
+
+	if (copy_from_user(val, buf, 3))
+		return -EFAULT;
+
+	/* format: <cpu#>,<cluster#> */
+	if (val[0] < '0' || val[0] > '4' ||
+	    val[1] != ',' ||
+	    val[2] < '0' || val[2] > '1')
+		return -EINVAL;
+
+	cpu = val[0] - '0';
+	cluster = val[2] - '0';
+	ret = bL_switch_request(cpu, cluster);
+
+	return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+	.write		= bL_switcher_write,
+	.owner	= THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+        MISC_DYNAMIC_MINOR,
+        "b.L_switcher",
+        &bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+	return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+	misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236dd1a03..4a2b32fd53a1 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
 
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val)
+{
+	unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+	poke[0] = poke_phys_addr;
+	poke[1] = poke_val;
+	__cpuc_flush_dcache_area((void *)poke, 8);
+	outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
 static const struct mcpm_platform_ops *platform_ops;
 
 int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705c4b24..0decb3c07165 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -15,6 +15,7 @@
 
 #include <linux/linkage.h>
 #include <asm/mcpm.h>
+#include <asm/assembler.h>
 
 #include "vlock.h"
 
@@ -47,6 +48,7 @@
 
 ENTRY(mcpm_entry_point)
 
+ ARM_BE8(setend        be)
  THUMB(	adr	r12, BSYM(1f)	)
  THUMB(	bx	r12		)
  THUMB(	.thumb			)
@@ -71,12 +73,19 @@ ENTRY(mcpm_entry_point)
 	 * position independent way.
 	 */
 	adr	r5, 3f
-	ldmia	r5, {r6, r7, r8, r11}
+	ldmia	r5, {r0, r6, r7, r8, r11}
+	add	r0, r5, r0			@ r0 = mcpm_entry_early_pokes
 	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
 	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
 	add	r8, r5, r8			@ r8 = mcpm_sync
 	add	r11, r5, r11			@ r11 = first_man_locks
 
+	@ Perform an early poke, if any
+	add	r0, r0, r4, lsl #3
+	ldmia	r0, {r0, r1}
+	teq	r0, #0
+	strne	r1, [r0]
+
 	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
 	mla	r8, r0, r10, r8			@ r8 = sync cluster base
 
@@ -195,7 +204,8 @@ mcpm_entry_gated:
 
 	.align	2
 
-3:	.word	mcpm_entry_vectors - .
+3:	.word	mcpm_entry_early_pokes - .
+	.word	mcpm_entry_vectors - 3b
 	.word	mcpm_power_up_setup_phys - 3b
 	.word	mcpm_sync - 3b
 	.word	first_man_locks - 3b
@@ -214,6 +224,10 @@ first_man_locks:
 ENTRY(mcpm_entry_vectors)
 	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
 
+	.type	mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+	.space	8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
 	.type	mcpm_power_up_setup_phys, #object
 ENTRY(mcpm_power_up_setup_phys)
 	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index accefe099182..4b26d14e41b3 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -17,7 +17,8 @@ int arch_timer_arch_init(void);
  * nicely work out which register we want, and chuck away the rest of
  * the code. At least it does so with a recent GCC (4.6.3).
  */
-static inline void arch_timer_reg_write(const int access, const int reg, u32 val)
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 {
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
@@ -28,9 +29,7 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
 			asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val));
 			break;
 		}
-	}
-
-	if (access == ARCH_TIMER_VIRT_ACCESS) {
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
 			asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val));
@@ -44,7 +43,8 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
 	isb();
 }
 
-static inline u32 arch_timer_reg_read(const int access, const int reg)
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 {
 	u32 val = 0;
 
@@ -57,9 +57,7 @@ static inline u32 arch_timer_reg_read(const int access, const int reg)
 			asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val));
 			break;
 		}
-	}
-
-	if (access == ARCH_TIMER_VIRT_ACCESS) {
+	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
 			asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val));
@@ -89,17 +87,43 @@ static inline u64 arch_counter_get_cntvct(void)
 	return cval;
 }
 
-static inline void __cpuinit arch_counter_set_user_access(void)
+static inline u32 arch_timer_get_cntkctl(void)
 {
 	u32 cntkctl;
-
 	asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl));
+	return cntkctl;
+}
 
-	/* disable user access to everything */
-	cntkctl &= ~((3 << 8) | (7 << 0));
-
+static inline void arch_timer_set_cntkctl(u32 cntkctl)
+{
 	asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
 }
+
+static inline void __cpuinit arch_counter_set_user_access(void)
+{
+	u32 cntkctl = arch_timer_get_cntkctl();
+
+	/* Disable user access to both physical/virtual counters/timers */
+	/* Also disable virtual event stream */
+	cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
+			| ARCH_TIMER_USR_VT_ACCESS_EN
+			| ARCH_TIMER_VIRT_EVT_EN
+			| ARCH_TIMER_USR_VCT_ACCESS_EN
+			| ARCH_TIMER_USR_PCT_ACCESS_EN);
+	arch_timer_set_cntkctl(cntkctl);
+}
+
+static inline void arch_timer_evtstrm_enable(int divider)
+{
+	u32 cntkctl = arch_timer_get_cntkctl();
+	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
+	/* Set the divider and enable virtual event stream */
+	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
+			| ARCH_TIMER_VIRT_EVT_EN;
+	arch_timer_set_cntkctl(cntkctl);
+	elf_hwcap |= HWCAP_EVTSTRM;
+}
+
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 05ee9eebad6b..f0963bb79935 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +41,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
@@ -53,6 +53,13 @@
 #define put_byte_3      lsl #0
 #endif
 
+/* Select code for any configuration running in BE8 mode */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define ARM_BE8(code...) code
+#else
+#define ARM_BE8(code...)
+#endif
+
 /*
  * Data preload for architectures that support it
  */
@@ -212,9 +219,9 @@
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
 	.ifeqs "\mode","arm"
-	ALT_SMP(dmb)
+	ALT_SMP(dmb	ish)
 	.else
-	ALT_SMP(W(dmb))
+	ALT_SMP(W(dmb)	ish)
 	.endif
 #elif __LINUX_ARM_ARCH__ == 6
 	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index da1c77d39327..6447a0b7b127 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -301,8 +301,8 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_add\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	adds	%0, %0, %4\n"
-"	adc	%H0, %H0, %H4\n"
+"	adds	%Q0, %Q0, %Q4\n"
+"	adc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -320,8 +320,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_add_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	adds	%0, %0, %4\n"
-"	adc	%H0, %H0, %H4\n"
+"	adds	%Q0, %Q0, %Q4\n"
+"	adc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -341,8 +341,8 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_sub\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, %4\n"
-"	sbc	%H0, %H0, %H4\n"
+"	subs	%Q0, %Q0, %Q4\n"
+"	sbc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -360,8 +360,8 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_sub_return\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, %4\n"
-"	sbc	%H0, %H0, %H4\n"
+"	subs	%Q0, %Q0, %Q4\n"
+"	sbc	%R0, %R0, %R4\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
@@ -428,9 +428,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v)
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
 "1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%0, %0, #1\n"
-"	sbc	%H0, %H0, #0\n"
-"	teq	%H0, #0\n"
+"	subs	%Q0, %Q0, #1\n"
+"	sbc	%R0, %R0, #0\n"
+"	teq	%R0, #0\n"
 "	bmi	2f\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
@@ -459,8 +459,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
 "	teqeq	%H0, %H5\n"
 "	moveq	%1, #0\n"
 "	beq	2f\n"
-"	adds	%0, %0, %6\n"
-"	adc	%H0, %H0, %H6\n"
+"	adds	%Q0, %Q0, %Q6\n"
+"	adc	%R0, %R0, %R6\n"
 "	strexd	%2, %0, %H0, [%4]\n"
 "	teq	%2, #0\n"
 "	bne	1b\n"
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..482383b45c91
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,83 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+			 bL_switch_completion_handler completer,
+			 void *completer_cookie);
+static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+	return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL);
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE	0
+#define BL_NOTIFY_POST_ENABLE	1
+#define BL_NOTIFY_PRE_DISABLE	2
+#define BL_NOTIFY_POST_DISABLE	3
+
+#ifdef CONFIG_BL_SWITCHER
+
+void bL_switch_request_detach(unsigned int cpu,
+			      bL_switch_completion_handler completer);
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled.  Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled().  These functions are not
+ * recursive.
+ */
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
+int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
+
+#else
+static void bL_switch_request_detach(unsigned int cpu,
+				     bL_switch_completion_handler completer) { }
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+#endif /* CONFIG_BL_SWITCHER */
+
+#endif
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..60f15e274e6d 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -14,27 +14,27 @@
 #endif
 
 #if __LINUX_ARM_ARCH__ >= 7
-#define isb() __asm__ __volatile__ ("isb" : : : "memory")
-#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
-#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
+#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
+#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
+#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
 				    : : "r" (0) : "memory")
 #elif defined(CONFIG_CPU_FA526)
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #else
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define isb(x) __asm__ __volatile__ ("" : : : "memory")
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
 #ifdef CONFIG_ARCH_HAS_BARRIERS
@@ -42,7 +42,7 @@
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
 #define rmb()		dsb()
-#define wmb()		mb()
+#define wmb()		do { dsb(st); outer_sync(); } while (0)
 #else
 #define mb()		barrier()
 #define rmb()		barrier()
@@ -54,9 +54,9 @@
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #else
-#define smp_mb()	dmb()
-#define smp_rmb()	dmb()
-#define smp_wmb()	dmb()
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	smp_mb()
+#define smp_wmb()	dmb(ishst)
 #endif
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 7af5c6c3653a..b274bde24905 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -2,6 +2,8 @@
 #define _ASMARM_BUG_H
 
 #include <linux/linkage.h>
+#include <linux/types.h>
+#include <asm/opcodes.h>
 
 #ifdef CONFIG_BUG
 
@@ -12,10 +14,10 @@
  */
 #ifdef CONFIG_THUMB2_KERNEL
 #define BUG_INSTR_VALUE 0xde02
-#define BUG_INSTR_TYPE ".hword "
+#define BUG_INSTR(__value) __inst_thumb16(__value)
 #else
 #define BUG_INSTR_VALUE 0xe7f001f2
-#define BUG_INSTR_TYPE ".word "
+#define BUG_INSTR(__value) __inst_arm(__value)
 #endif
 
 
@@ -33,7 +35,7 @@
 
 #define __BUG(__file, __line, __value)				\
 do {								\
-	asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n"	\
+	asm volatile("1:\t" BUG_INSTR(__value) "\n"  \
 		".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
 		"2:\t.asciz " #__file "\n" 			\
 		".popsection\n" 				\
@@ -48,7 +50,7 @@ do {								\
 
 #define __BUG(__file, __line, __value)				\
 do {								\
-	asm volatile(BUG_INSTR_TYPE #__value);			\
+	asm volatile(BUG_INSTR(__value) "\n");			\
 	unreachable();						\
 } while (0)
 #endif  /* CONFIG_DEBUG_BUGVERBOSE */
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index a25e62d2de6e..2059f019bef4 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -437,4 +437,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
 #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))
 #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))
 
+/*
+ * Disabling cache access for one CPU in an ARMv7 SMP system is tricky.
+ * To do so we must:
+ *
+ * - Clear the SCTLR.C bit to prevent further cache allocations
+ * - Flush the desired level of cache
+ * - Clear the ACTLR "SMP" bit to disable local coherency
+ *
+ * ... and so without any intervening memory access in between those steps,
+ * not even to the stack.
+ *
+ * WARNING -- After this has been called:
+ *
+ * - No ldrex/strex (and similar) instructions must be used.
+ * - The CPU is obviously no longer coherent with the other CPUs.
+ * - This is unlikely to work as expected if Linux is running non-secure.
+ *
+ * Note:
+ *
+ * - This is known to apply to several ARMv7 processor implementations,
+ *   however some exceptions may exist.  Caveat emptor.
+ *
+ * - The clobber list is dictated by the call to v7_flush_dcache_*.
+ *   fp is preserved to the stack explicitly prior disabling the cache
+ *   since adding it to the clobber list is incompatible with having
+ *   CONFIG_FRAME_POINTER=y.  ip is saved as well if ever r12-clobbering
+ *   trampoline are inserted by the linker and to keep sp 64-bit aligned.
+ */
+#define v7_exit_coherency_flush(level) \
+	asm volatile( \
+	"stmfd	sp!, {fp, ip} \n\t" \
+	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR \n\t" \
+	"bic	r0, r0, #"__stringify(CR_C)" \n\t" \
+	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR \n\t" \
+	"isb	\n\t" \
+	"bl	v7_flush_dcache_"__stringify(level)" \n\t" \
+	"clrex	\n\t" \
+	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR \n\t" \
+	"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t" \
+	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR \n\t" \
+	"isb	\n\t" \
+	"dsb	\n\t" \
+	"ldmfd	sp!, {fp, ip}" \
+	: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
+	      "r9","r10","lr","memory" )
+
 #endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 1f3262e99d81..cedd3721318b 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -61,6 +61,20 @@ static inline void set_cr(unsigned int val)
 	isb();
 }
 
+static inline unsigned int get_auxcr(void)
+{
+	unsigned int val;
+	asm("mrc p15, 0, %0, c1, c0, 1	@ get AUXCR" : "=r" (val));
+	return val;
+}
+
+static inline void set_auxcr(unsigned int val)
+{
+	asm volatile("mcr p15, 0, %0, c1, c0, 1	@ set AUXCR"
+	  : : "r" (val));
+	isb();
+}
+
 #ifndef CONFIG_SMP
 extern void adjust_cr(unsigned long mask, unsigned long set);
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index dba62cb1ad08..3392fe2d3174 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -43,15 +43,18 @@
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_INTEL		0x69
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136		0x4100b360
+#define ARM_CPU_PART_ARM1156		0x4100b560
+#define ARM_CPU_PART_ARM1176		0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE	0x4100b020
+#define ARM_CPU_PART_CORTEX_A8		0x4100c080
+#define ARM_CPU_PART_CORTEX_A9		0x4100c090
+#define ARM_CPU_PART_CORTEX_A5		0x4100c050
+#define ARM_CPU_PART_CORTEX_A7		0x4100c070
+#define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+	return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
 {
 	return read_cpuid_id() & 0xFFF0;
 }
 
 static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 {
-	return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4d93da..4f8e9e5514b1 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,10 +2,9 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
-#include <asm-generic/dma-contiguous.h>
 
 void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
 
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 56211f2084ef..f4b46d39b9cf 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef struct user_fp elf_fpregset_t;
 
-#define EM_ARM	40
-
 #define EF_ARM_EABI_MASK	0xff000000
 #define EF_ARM_EABI_UNKNOWN	0x00000000
 #define EF_ARM_EABI_VER1	0x01000000
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index f89515adac60..39eb16b0066f 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level)
 
 #endif
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* ifndef __ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 2740c2a2df63..3d7351c844aa 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	6
+#define NR_IPI	7
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h
index 0cf7a6b842ff..ad774f37c47c 100644
--- a/arch/arm/include/asm/hardware/coresight.h
+++ b/arch/arm/include/asm/hardware/coresight.h
@@ -24,8 +24,8 @@
 #define TRACER_TIMEOUT 10000
 
 #define etm_writel(t, v, x) \
-	(__raw_writel((v), (t)->etm_regs + (x)))
-#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x)))
+	(writel_relaxed((v), (t)->etm_regs + (x)))
+#define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x)))
 
 /* CoreSight Management Registers */
 #define CSMR_LOCKACCESS 0xfb0
@@ -142,8 +142,8 @@
 #define ETBFF_TRIGFL		BIT(10)
 
 #define etb_writel(t, v, x) \
-	(__raw_writel((v), (t)->etb_regs + (x)))
-#define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x)))
+	(writel_relaxed((v), (t)->etb_regs + (x)))
+#define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x)))
 
 #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0)
 #define etm_unlock(t) \
diff --git a/arch/arm/include/asm/hardware/debug-pl01x.S b/arch/arm/include/asm/hardware/debug-pl01x.S
index f9fd083eff63..6489d1ffe3c8 100644
--- a/arch/arm/include/asm/hardware/debug-pl01x.S
+++ b/arch/arm/include/asm/hardware/debug-pl01x.S
@@ -18,12 +18,14 @@
 
 		.macro	waituart,rd,rx
 1001:		ldr	\rd, [\rx, #UART01x_FR]
+ ARM_BE8(	rev	\rd, \rd )
 		tst	\rd, #UART01x_FR_TXFF
 		bne	1001b
 		.endm
 
 		.macro	busyuart,rd,rx
 1001:		ldr	\rd, [\rx, #UART01x_FR]
+ ARM_BE8(	rev	\rd, \rd )
 		tst	\rd, #UART01x_FR_BUSY
 		bne	1001b
 		.endm
diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
new file mode 100644
index 000000000000..d4014fbe5ea3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/include/asm/hugetlb-3level.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_3LEVEL_H
+#define _ASM_ARM_HUGETLB_3LEVEL_H
+
+
+/*
+ * If our huge pte is non-zero then mark the valid bit.
+ * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero
+ * ptes.
+ * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
+ */
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pte_t retval = *ptep;
+	if (pte_val(retval))
+		pte_val(retval) |= L_PTE_VALID;
+	return retval;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
new file mode 100644
index 000000000000..1f1b1cd112f3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_H
+#define _ASM_ARM_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#include <asm/hugetlb-3level.h>
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 48066ce9ea34..0a9d5dd93294 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -11,6 +11,7 @@
 #define __ARM_KGDB_H__
 
 #include <linux/ptrace.h>
+#include <asm/opcodes.h>
 
 /*
  * GDB assumes that we're a user process being debugged, so
@@ -41,7 +42,7 @@
 
 static inline void arch_kgdb_breakpoint(void)
 {
-	asm(".word 0xe7ffdeff");
+	asm(__inst_arm(0xe7ffdeff));
 }
 
 extern void kgdb_handle_bus_error(void);
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef14..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,8 +55,10 @@
  * The bits we set in HCR:
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until MMU and caches are on)
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
+ * TWE:		Trap WFE
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
  * FB:		Force broadcast of all maintainance operations
@@ -67,8 +69,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_SWIO | HCR_TIDCP)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+			HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
@@ -95,12 +96,12 @@
 #define TTBCR_IRGN1	(3 << 24)
 #define TTBCR_EPD1	(1 << 23)
 #define TTBCR_A1	(1 << 22)
-#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_T1SZ	(7 << 16)
 #define TTBCR_SH0	(3 << 12)
 #define TTBCR_ORGN0	(3 << 10)
 #define TTBCR_IRGN0	(3 << 8)
 #define TTBCR_EPD0	(1 << 7)
-#define TTBCR_T0SZ	3
+#define TTBCR_T0SZ	(7 << 0)
 #define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */
@@ -135,7 +136,6 @@
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
 #define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
@@ -209,6 +209,8 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
+#define HSR_WFI_IS_WFE		(1U << 0)
+
 #define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
 
 #define HSR_DABT_S1PTW		(1U << 7)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 4bb08e3e52bc..3a67bec72d0c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
 #define c6_IFAR		17	/* Instruction Fault Address Register */
 #define c7_PAR		18	/* Physical Address Register */
 #define c7_PAR_high	19	/* PAR top 32 bits */
-#define c9_L2CTLR	20	/* Cortex A15 L2 Control Register */
+#define c9_L2CTLR	20	/* Cortex A15/A7 L2 Control Register */
 #define c10_PRRR	21	/* Primary Region Remap Register */
 #define c10_NMRR	22	/* Normal Memory Remap Register */
 #define c12_VBAR	23	/* Vector Base Address Register */
@@ -48,7 +48,9 @@
 #define c13_TID_URO	26	/* Thread ID, User R/O */
 #define c13_TID_PRIV	27	/* Thread ID, Privileged */
 #define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
+#define c10_AMAIR0	29	/* Auxilary Memory Attribute Indirection Reg0 */
+#define c10_AMAIR1	30	/* Auxilary Memory Attribute Indirection Reg1 */
+#define NR_CP15_REGS	31	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
@@ -59,6 +61,24 @@
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
@@ -74,8 +94,6 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2c..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
-	return reg == 15;
-}
-
 static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.hsr;
@@ -154,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
 
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
 {
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
+{
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
@@ -162,4 +162,69 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cp15[c0_MPIDR];
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		default:
+			return be32_to_cpu(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		default:
+			return le32_to_cpu(data);
+		}
+	}
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		default:
+			return cpu_to_be32(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		default:
+			return cpu_to_le32(data);
+		}
+	}
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a6203..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,30 +19,31 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
 
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 
-#define KVM_VCPU_MAX_FEATURES 1
-
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+#define KVM_VCPU_MAX_FEATURES 2
 
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
@@ -101,6 +102,12 @@ struct kvm_vcpu_arch {
 	/* The CPU type we expose to the VM */
 	u32 midr;
 
+	/* HYP trapping configuration */
+	u32 hcr;
+
+	/* Interrupt related fields */
+	u32 irq_lines;		/* IRQ and FIQ levels */
+
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
@@ -128,9 +135,6 @@ struct kvm_vcpu_arch {
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
-	/* Interrupt related fields */
-	u32 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
@@ -146,19 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -183,15 +185,14 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
-				       unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
@@ -221,7 +222,18 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 	return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,9 +62,15 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+	*pmd = new_pmd;
+	flush_pmd_entry(pmd);
+}
+
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
-	pte_val(*pte) = new_pte;
+	*pte = new_pte;
 	/*
 	 * flush_pmd_entry just takes a void pointer and cleans the necessary
 	 * cache entries, so we can reuse the function for ptes.
@@ -72,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-	if (hsr_ec == HSR_EC_IABT)
-		return false;
-	else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-		return false;
-	else
-		return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
 	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
@@ -103,10 +98,51 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= L_PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+#define kvm_pgd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+#define kvm_pud_addr_end(addr,end)		(end)
+
+#define kvm_pmd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
 {
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -120,15 +156,16 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
 	 */
 	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+		__cpuc_coherent_user_range(hva, hva + size);
 	} else if (!icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
+
+void stage2_flush_vm(struct kvm *kvm);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
deleted file mode 100644
index 343744e4809c..000000000000
--- a/arch/arm/include/asm/kvm_vgic.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_VGIC_H
-#define __ASM_ARM_KVM_VGIC_H
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/irqreturn.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
-
-#define VGIC_NR_IRQS		128
-#define VGIC_NR_SGIS		16
-#define VGIC_NR_PPIS		16
-#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
-#define VGIC_MAX_LRS		(1 << 6)
-
-/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
-#error	Invalid number of CPU interfaces
-#endif
-
-#if (VGIC_NR_IRQS & 31)
-#error "VGIC_NR_IRQS must be a multiple of 32"
-#endif
-
-#if (VGIC_NR_IRQS > 1024)
-#error "VGIC_NR_IRQS must be <= 1024"
-#endif
-
-/*
- * The GIC distributor registers describing interrupts have two parts:
- * - 32 per-CPU interrupts (SGI + PPI)
- * - a bunch of shared interrupts (SPI)
- */
-struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
-};
-
-struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
-};
-
-struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
-	spinlock_t		lock;
-	bool			ready;
-
-	/* Virtual control interface mapping */
-	void __iomem		*vctrl_base;
-
-	/* Distributor and vcpu interface mapping in the guest */
-	phys_addr_t		vgic_dist_base;
-	phys_addr_t		vgic_cpu_base;
-
-	/* Distributor enabled */
-	u32			enabled;
-
-	/* Interrupt enabled (one bit per IRQ) */
-	struct vgic_bitmap	irq_enabled;
-
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
-
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
-
-	/* Interrupt priority. Not used yet. */
-	struct vgic_bytemap	irq_priority;
-
-	/* Level/edge triggered */
-	struct vgic_bitmap	irq_cfg;
-
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
-
-	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
-#endif
-};
-
-struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
-	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
-
-	/* Pending interrupts on this VCPU */
-	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
-
-	/* Bitmap of used/free list registers */
-	DECLARE_BITMAP(	lr_used, VGIC_MAX_LRS);
-
-	/* Number of list registers on this CPU */
-	int		nr_lr;
-
-	/* CPU vif control registers for world switch */
-	u32		vgic_hcr;
-	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
-	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
-#endif
-};
-
-#define LR_EMPTY	0xff
-
-struct kvm;
-struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
-
-#ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
-int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level);
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio);
-
-#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
-
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-				      unsigned int irq_num, bool level)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-	return true;
-}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 308ad7d6f98b..75bf07910b81 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/types.h>
+
 #ifndef __ASSEMBLY__
 
 struct tag;
@@ -16,8 +18,10 @@ struct pt_regs;
 struct smp_operations;
 #ifdef CONFIG_SMP
 #define smp_ops(ops) (&(ops))
+#define smp_init_ops(ops) (&(ops))
 #else
 #define smp_ops(ops) (struct smp_operations *)NULL
+#define smp_init_ops(ops) (bool (*)(void))NULL
 #endif
 
 struct machine_desc {
@@ -41,6 +45,7 @@ struct machine_desc {
 	unsigned char		reserve_lp2 :1;	/* never has lp2	*/
 	char			restart_mode;	/* default restart mode	*/
 	struct smp_operations	*smp;		/* SMP operations	*/
+	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **,
 					 struct meminfo *);
 	void			(*reserve)(void);/* reserve mem blocks	*/
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b7620e9a5..7626a7fd4938 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
 
 /*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated.  The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
  * CPU/cluster power operations API for higher subsystems to use.
  */
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..21b458e6b0b8 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -157,6 +157,7 @@
  */
 #define __PV_BITS_31_24	0x81000000
 
+extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 extern unsigned long __pv_phys_offset;
 #define PHYS_OFFSET __pv_phys_offset
 
@@ -233,6 +234,21 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
 /*
+ * These are for systems that have a hardware interconnect supported alias of
+ * physical memory for idmap purposes.  Most cases should leave these
+ * untouched.
+ */
+static inline phys_addr_t __virt_to_idmap(unsigned long x)
+{
+	if (arch_virt_to_idmap)
+		return arch_virt_to_idmap(x);
+	else
+		return __virt_to_phys(x);
+}
+
+#define virt_to_idmap(x)	__virt_to_idmap((unsigned long)(x))
+
+/*
  * Virtual <-> DMA view memory address translations
  * Again, these are *only* valid on the kernel direct mapped RAM
  * memory.  Use of these is *deprecated* (and that doesn't mean
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 6f18da09668b..64fd15159b7d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -16,7 +16,7 @@ typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 #define ASID_BITS	8
 #define ASID_MASK	((~0ULL) << ASID_BITS)
-#define ASID(mm)	((mm)->context.id.counter & ~ASID_MASK)
+#define ASID(mm)	((unsigned int)((mm)->context.id.counter & ~ASID_MASK))
 #else
 #define ASID(mm)	(0)
 #endif
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef82ad5..f088c864c992 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -30,6 +30,7 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
 #define PMD_APTABLE_SHIFT	(61)
@@ -41,6 +42,8 @@
  */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
@@ -66,6 +69,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..c5d94b31bde4 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -62,6 +62,14 @@
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
 /*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+/*
  * "Linux" PTE definitions for LPAE.
  *
  * These bits overlap with the hardware bits but the naming is preserved for
@@ -79,6 +87,11 @@
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+
 /*
  * To be used in assembly code with the upper page attributes.
  */
@@ -113,6 +126,8 @@
 #define L_PTE_S2_RDONLY		 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
 #define L_PTE_S2_RDWR		 (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDWR		 (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
  */
@@ -166,8 +181,83 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
+/*
+ * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes
+ * that are written to a page table but not for ptes created with mk_pte.
+ *
+ * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to
+ * hugetlb_cow, where it is compared with an entry in a page table.
+ * This comparison test fails erroneously leading ultimately to a memory leak.
+ *
+ * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is
+ * present before running the comparison.
+ */
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(pte_a,pte_b)	((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG	\
+					: pte_val(pte_a))				\
+				== (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG	\
+					: pte_val(pte_b)))
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
+#define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+#define pmd_mknotpresent(pmd)	(__pmd(0))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
+				PMD_SECT_VALID | PMD_SECT_NONE;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	BUG_ON(addr >= TASK_SIZE);
+
+	/* create a faulting entry if PROT_NONE protected */
+	if (pmd_val(pmd) & PMD_SECT_NONE)
+		pmd_val(pmd) &= ~PMD_SECT_VALID;
+
+	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+	flush_pmd_entry(pmdp);
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5aac06fcc97e..b1b7a4907489 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,9 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 
+
+#include <asm/tlbflush.h>
+
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level.h>
 #else
@@ -97,7 +100,7 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
 #define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad26c70..0cd7824ca762 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -62,9 +62,19 @@ struct pmu_hw_events {
 	raw_spinlock_t		pmu_lock;
 };
 
+struct cpupmu_regs {
+	u32 pmc;
+	u32 pmcntenset;
+	u32 pmuseren;
+	u32 pmintenset;
+	u32 pmxevttype[8];
+	u32 pmxevtcnt[8];
+};
+
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	cpumask_t	valid_cpus;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
 	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
 	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
+	void		(*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+	void		(*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
 	int		num_events;
 	atomic_t	active_events;
 	struct mutex	reserve_mutex;
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index ce0dbe7c1625..f0a8627c9f1c 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -16,6 +16,10 @@
 
 #define PSCI_POWER_STATE_TYPE_STANDBY		0
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL0	0
+#define PSCI_POWER_STATE_AFFINITY_LEVEL1	1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL2	2
+#define PSCI_POWER_STATE_AFFINITY_LEVEL3	3
 
 struct psci_power_state {
 	u16	id;
@@ -32,5 +36,22 @@ struct psci_operations {
 };
 
 extern struct psci_operations psci_ops;
+extern struct smp_operations psci_smp_ops;
 
+#ifdef CONFIG_ARM_PSCI
+void psci_init(void);
+bool psci_smp_available(void);
+#else
+static inline void psci_init(void) { }
+static inline bool psci_smp_available(void) { return false; }
+#endif
+
+#ifdef CONFIG_ARM_PSCI
+extern int __init psci_probe(void);
+#else
+static inline int psci_probe(void)
+{
+	return -ENODEV;
+}
+#endif
 #endif /* __ASM_ARM_PSCI_H */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22bebe6ce..610ccf33f5e7 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
 struct smp_operations {
 #ifdef CONFIG_SMP
 	/*
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d169373612..1a292d8be988 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
 
 static inline bool scu_a9_has_base(void)
 {
-	return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 }
 
 static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index aa9b4ac3fdf6..0baf7f0d9394 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -207,6 +207,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 #endif
 }
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d141c1d..c37459299fc9 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -535,6 +535,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 }
 #endif
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 58b8b84adcd2..983fa7c153a2 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 0*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		 = jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
 
 #else
 
 static inline void init_cpu_topology(void) { }
 static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+	cpumask_t *cluster_mask) { return -EINVAL; }
 
 #endif
 
diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 3688fd15a32d..7dcc10d67253 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -25,6 +25,7 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_VFPD32	(1 << 19)	/* set if VFP has 32 regs (not 16) */
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
-
+#define HWCAP_LPAE	(1 << 20)
+#define HWCAP_EVTSTRM	(1 << 21)
 
 #endif /* _UAPI__ASMARM_HWCAP_H */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,10 +20,12 @@
 #define __ARM_KVM_H__
 
 #include <linux/types.h>
+#include <linux/psci.h>
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -63,7 +65,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15	0
-#define KVM_ARM_NUM_TARGETS		1
+#define KVM_ARM_TARGET_CORTEX_A7	1
+#define KVM_ARM_NUM_TARGETS		2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
@@ -82,6 +85,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -118,6 +122,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
@@ -142,6 +166,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
@@ -172,9 +205,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338eacad2..aa775438388c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,8 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
-		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+		   setup.o signal.o sigreturn_codes.o \
+		   stacktrace.o sys_arm.o time.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
@@ -82,6 +83,9 @@ obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
-obj-$(CONFIG_ARM_PSCI)		+= psci.o
+ifeq ($(CONFIG_ARM_PSCI),y)
+obj-y				+= psci.o
+obj-$(CONFIG_SMP)		+= psci_smp.o
+endif
 
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..776d9186e9c1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,7 @@ int main(void)
   DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
@@ -175,13 +176,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 5859c8bc727c..a44e7d11ab02 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -212,7 +212,7 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 	}
 	if (!mdesc_best) {
 		const char *prop;
-		long size;
+		int size;
 
 		early_print("\nError: unrecognized/unsupported "
 			    "device tree compatible list:\n[ ");
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 32640ae7750f..45a68d6bb2a3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -416,9 +416,8 @@ __und_usr:
 	bne	__und_usr_thumb
 	sub	r4, r2, #4			@ ARM instr at LR - 4
 1:	ldrt	r0, [r4]
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	rev	r0, r0				@ little endian instruction
-#endif
+ ARM_BE8(rev	r0, r0)				@ little endian instruction
+
 	@ r0 = 32-bit ARM instruction which caused the exception
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 4bc816a74a2e..11d68917d3b1 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -389,9 +389,7 @@ ENTRY(vector_swi)
 #else
  USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
 #endif
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	rev	r10, r10			@ little endian instruction
-#endif
+ ARM_BE8(rev	r10, r10)			@ little endian instruction
 
 #elif defined(CONFIG_AEABI)
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553fe213..11284e744c80 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -77,6 +77,7 @@
 
 	__HEAD
 ENTRY(stext)
+ ARM_BE8(setend	be )			@ ensure we are in BE8 mode
 
  THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
@@ -342,7 +343,6 @@ __turn_mmu_on_loc:
 	.long	__turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
-	__CPUINIT
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
@@ -351,6 +351,9 @@ ENTRY(secondary_startup)
 	 * the processor type - there is no need to check the machine type
 	 * as it has already been validated by the primary processor.
 	 */
+
+ ARM_BE8(setend	be)				@ ensure we are in BE8 mode
+
 #ifdef CONFIG_ARM_VIRT_EXT
 	bl	__hyp_stub_install_secondary
 #endif
@@ -584,8 +587,10 @@ __fixup_a_pv_table:
 	b	2f
 1:	add     r7, r3
 	ldrh	ip, [r7, #2]
+ARM_BE8(rev16	ip, ip)
 	and	ip, 0x8f00
 	orr	ip, r6	@ mask in offset bits 31-24
+ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7, #2]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
@@ -594,8 +599,14 @@ __fixup_a_pv_table:
 #else
 	b	2f
 1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	@ in BE8, we load data in BE, but instructions still in LE
+	bic	ip, ip, #0xff000000
+	orr	ip, ip, r6, lsl#24
+#else
 	bic	ip, ip, #0x000000ff
 	orr	ip, ip, r6	@ mask in offset bits 31-24
+#endif
 	str	ip, [r7, r3]
 2:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 1fd749ee4a1b..1b803117ed91 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
 
 static void __init pm_init(void)
 {
-	cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+	if (has_ossr)
+		cpu_pm_register_notifier(&dbg_cpu_pm_nb);
 }
 #else
 static inline void pm_init(void)
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 1e9be5d25e56..7e137873083d 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -24,6 +24,7 @@
 #include <asm/sections.h>
 #include <asm/smp_plat.h>
 #include <asm/unwind.h>
+#include <asm/opcodes.h>
 
 #ifdef CONFIG_XIP_KERNEL
 /*
@@ -60,6 +61,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		Elf32_Sym *sym;
 		const char *symname;
 		s32 offset;
+		u32 tmp;
 #ifdef CONFIG_THUMB2_KERNEL
 		u32 upper, lower, sign, j1, j2;
 #endif
@@ -95,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		case R_ARM_PC24:
 		case R_ARM_CALL:
 		case R_ARM_JUMP24:
-			offset = (*(u32 *)loc & 0x00ffffff) << 2;
+			offset = __mem_to_opcode_arm(*(u32 *)loc);
+			offset = (offset & 0x00ffffff) << 2;
 			if (offset & 0x02000000)
 				offset -= 0x04000000;
 
@@ -111,9 +114,10 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			}
 
 			offset >>= 2;
+			offset &= 0x00ffffff;
 
-			*(u32 *)loc &= 0xff000000;
-			*(u32 *)loc |= offset & 0x00ffffff;
+			*(u32 *)loc &= __opcode_to_mem_arm(0xff000000);
+			*(u32 *)loc |= __opcode_to_mem_arm(offset);
 			break;
 
 	       case R_ARM_V4BX:
@@ -121,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			* other bits to re-code instruction as
 			* MOV PC,Rm.
 			*/
-		       *(u32 *)loc &= 0xf000000f;
-		       *(u32 *)loc |= 0x01a0f000;
+		       *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f);
+		       *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000);
 		       break;
 
 		case R_ARM_PREL31:
@@ -132,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 
 		case R_ARM_MOVW_ABS_NC:
 		case R_ARM_MOVT_ABS:
-			offset = *(u32 *)loc;
+			offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
 			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
 			offset = (offset ^ 0x8000) - 0x8000;
 
@@ -140,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
 				offset >>= 16;
 
-			*(u32 *)loc &= 0xfff0f000;
-			*(u32 *)loc |= ((offset & 0xf000) << 4) |
-					(offset & 0x0fff);
+			tmp &= 0xfff0f000;
+			tmp |= ((offset & 0xf000) << 4) |
+				(offset & 0x0fff);
+
+			*(u32 *)loc = __opcode_to_mem_arm(tmp);
 			break;
 
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
-			upper = *(u16 *)loc;
-			lower = *(u16 *)(loc + 2);
+			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
+			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
 			/*
 			 * 25 bit signed address range (Thumb-2 BL and B.W
@@ -198,17 +204,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			sign = (offset >> 24) & 1;
 			j1 = sign ^ (~(offset >> 23) & 1);
 			j2 = sign ^ (~(offset >> 22) & 1);
-			*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
+			upper = (u16)((upper & 0xf800) | (sign << 10) |
 					    ((offset >> 12) & 0x03ff));
-			*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
-						  (j1 << 13) | (j2 << 11) |
-						  ((offset >> 1) & 0x07ff));
+			lower = (u16)((lower & 0xd000) |
+				      (j1 << 13) | (j2 << 11) |
+				      ((offset >> 1) & 0x07ff));
+
+			*(u16 *)loc = __opcode_to_mem_thumb16(upper);
+			*(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower);
 			break;
 
 		case R_ARM_THM_MOVW_ABS_NC:
 		case R_ARM_THM_MOVT_ABS:
-			upper = *(u16 *)loc;
-			lower = *(u16 *)(loc + 2);
+			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
+			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
 			/*
 			 * MOVT/MOVW instructions encoding in Thumb-2:
@@ -229,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
 				offset >>= 16;
 
-			*(u16 *)loc = (u16)((upper & 0xfbf0) |
-					    ((offset & 0xf000) >> 12) |
-					    ((offset & 0x0800) >> 1));
-			*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
-						  ((offset & 0x0700) << 4) |
-						  (offset & 0x00ff));
+			upper = (u16)((upper & 0xfbf0) |
+				      ((offset & 0xf000) >> 12) |
+				      ((offset & 0x0800) >> 1));
+			lower = (u16)((lower & 0x8f00) |
+				      ((offset & 0x0700) << 4) |
+				      (offset & 0x00ff));
+			*(u16 *)loc = __opcode_to_mem_thumb16(upper);
+			*(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower);
 			break;
 #endif
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ace0ce8f6641..b41749fe56dc 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event,
 		return armpmu_map_cache_event(cache_map, config);
 	case PERF_TYPE_RAW:
 		return armpmu_map_raw_event(raw_event_mask, config);
+	default:
+		if (event->attr.type >= PERF_TYPE_MAX)
+			return armpmu_map_raw_event(raw_event_mask, config);
 	}
 
 	return -ENOENT;
@@ -163,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -179,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -206,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
+
 	armpmu_stop(event, PERF_EF_UPDATE);
 	hw_events->events[idx] = NULL;
 	clear_bit(idx, hw_events->used_mask);
@@ -222,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags)
 	int idx;
 	int err = 0;
 
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return 0;
+
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
@@ -431,6 +446,10 @@ static int armpmu_event_init(struct perf_event *event)
 	int err = 0;
 	atomic_t *active_events = &armpmu->active_events;
 
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+		return -ENOENT;
+
 	/* does not support taken branch sampling */
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc0..e0665b871f5b 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,6 +19,7 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
@@ -31,33 +32,36 @@
 #include <asm/pmu.h>
 
 /* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
 
 static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
 static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
 /*
  * Despite the names, these two functions are CPU-specific and are used
  * by the OProfile/perf code.
  */
 const char *perf_pmu_name(void)
 {
-	if (!cpu_pmu)
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	if (!pmu)
 		return NULL;
 
-	return cpu_pmu->name;
+	return pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
-	int max_events = 0;
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
 
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
+	if (!pmu)
+		return 0;
 
-	return max_events;
+	return pmu->num_events;
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 
 	for (i = 0; i < irqs; ++i) {
-		if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+		if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 			continue;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;
 
 	if (!pmu_device)
 		return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
 	for (i = 0; i < irqs; ++i) {
 		err = 0;
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		 * assume that we're running on a uniprocessor machine and
 		 * continue. Otherwise, continue without this interrupt.
 		 */
-		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+		if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
 				    irq, i);
 			continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 			return err;
 		}
 
-		cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+		cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 	}
 
 	return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int cpu;
-	for_each_possible_cpu(cpu) {
+	for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
 		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
 		events->events = per_cpu(hw_events, cpu);
 		events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+		on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
 }
 
 /*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;
 
-	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(cpu_pmu);
+	if (pmu && pmu->reset)
+		pmu->reset(pmu);
 	else
 		return NOTIFY_DONE;
 
 	return NOTIFY_OK;
 }
 
+static int cpu_pmu_pm_notify(struct notifier_block *b,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = smp_processor_id();
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+	struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+	if (!pmu)
+		return NOTIFY_DONE;
+
+	if (action == CPU_PM_ENTER && pmu->save_regs) {
+		pmu->save_regs(pmu, pmuregs);
+	} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+		pmu->restore_regs(pmu, pmuregs);
+	}
+
+	return NOTIFY_OK;
+}
+
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
 	.notifier_call = cpu_pmu_notify,
 };
 
+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+	.notifier_call = cpu_pmu_pm_notify,
+};
+
 /*
  * PMU platform driver and devicetree bindings.
  */
@@ -201,51 +234,44 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
 	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
+	switch (read_cpuid_part()) {
 	/* ARM Ltd CPUs. */
-	if (implementor == ARM_CPU_IMP_ARM) {
-		switch (part_number) {
-		case ARM_CPU_PART_ARM1136:
-		case ARM_CPU_PART_ARM1156:
-		case ARM_CPU_PART_ARM1176:
-			ret = armv6pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_ARM11MPCORE:
-			ret = armv6mpcore_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A8:
-			ret = armv7_a8_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A9:
-			ret = armv7_a9_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A5:
-			ret = armv7_a5_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A15:
-			ret = armv7_a15_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A7:
-			ret = armv7_a7_pmu_init(pmu);
-			break;
-		}
-	/* Intel CPUs [xscale]. */
-	} else if (implementor == ARM_CPU_IMP_INTEL) {
-		switch (xscale_cpu_arch_version()) {
-		case ARM_CPU_XSCALE_ARCH_V1:
-			ret = xscale1pmu_init(pmu);
-			break;
-		case ARM_CPU_XSCALE_ARCH_V2:
-			ret = xscale2pmu_init(pmu);
-			break;
+	case ARM_CPU_PART_ARM1136:
+	case ARM_CPU_PART_ARM1156:
+	case ARM_CPU_PART_ARM1176:
+		ret = armv6pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_ARM11MPCORE:
+		ret = armv6mpcore_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A8:
+		ret = armv7_a8_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A9:
+		ret = armv7_a9_pmu_init(pmu);
+		break;
+
+	default:
+		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+			switch (xscale_cpu_arch_version()) {
+			case ARM_CPU_XSCALE_ARCH_V1:
+				ret = xscale1pmu_init(pmu);
+				break;
+			case ARM_CPU_XSCALE_ARCH_V2:
+				ret = xscale2pmu_init(pmu);
+				break;
+			}
 		}
+		break;
 	}
 
+	/* assume PMU support all the CPUs in this case */
+	cpumask_setall(&pmu->valid_cpus);
+
 	put_cpu();
 	return ret;
 }
@@ -253,15 +279,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!");
-		return -ENOSPC;
-	}
+	int ret = 0;
+	int cpu;
 
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 	if (!pmu) {
@@ -270,8 +291,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-		ret = init_fn(pmu);
+		smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+		struct device_node *ncluster;
+		int cluster = -1;
+		cpumask_t sibling_mask;
+
+		ncluster = of_parse_phandle(node, "cluster", 0);
+		if (ncluster) {
+			int len;
+			const u32 *hwid;
+			hwid = of_get_property(ncluster, "reg", &len);
+			if (hwid && len == 4)
+				cluster = be32_to_cpup(hwid);
+		}
+		/* set sibling mask to all cpu mask if socket is not specified */
+		if (cluster == -1 ||
+			cluster_to_logical_mask(cluster, &sibling_mask))
+			cpumask_setall(&sibling_mask);
+
+		smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+		/* now set the valid_cpus after init */
+		cpumask_copy(&pmu->valid_cpus, &sibling_mask);
 	} else {
 		ret = probe_current_pmu(pmu);
 	}
@@ -281,10 +322,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
-	cpu_pmu_init(cpu_pmu);
-	ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+	for_each_cpu_mask(cpu, pmu->valid_cpus)
+		per_cpu(cpu_pmu, cpu) = pmu;
+
+	pmu->plat_device = pdev;
+	cpu_pmu_init(pmu);
+	ret = armpmu_register(pmu, -1);
 
 	if (!ret)
 		return 0;
@@ -313,9 +356,17 @@ static int __init register_pmu_driver(void)
 	if (err)
 		return err;
 
+	err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+	if (err) {
+		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+		return err;
+	}
+
 	err = platform_driver_register(&cpu_pmu_driver);
-	if (err)
+	if (err) {
+		cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
 		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+	}
 
 	return err;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 039cffb053a7..654db5030c31 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 }
 #endif
 
+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+	asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1"
+					: "=r"(regs->pmxevttype[cnt]));
+		asm volatile("mrc p15, 0, %0, c9, c13, 2"
+					: "=r"(regs->pmxevtcnt[cnt]));
+	}
+	return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mcr p15, 0, %0, c9, c13, 1"
+					: : "r"(regs->pmxevttype[cnt]));
+		asm volatile("mcr p15, 0, %0, c9, c13, 2"
+					: : "r"(regs->pmxevtcnt[cnt]));
+	}
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
 static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start		= armv7pmu_start;
 	cpu_pmu->stop		= armv7pmu_stop;
 	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->save_regs	= armv7pmu_save_regs;
+	cpu_pmu->restore_regs	= armv7pmu_restore_regs;
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A8";
+	cpu_pmu->name		= "ARMv7_Cortex_A8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A9";
+	cpu_pmu->name		= "ARMv7_Cortex_A9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A5";
+	cpu_pmu->name		= "ARMv7_Cortex_A5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A15";
+	cpu_pmu->name		= "ARMv7_Cortex_A15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A7";
+	cpu_pmu->name		= "ARMv7_Cortex_A7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 36531643cc2c..0daf4f252284 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -17,6 +17,7 @@
 
 #include <linux/init.h>
 #include <linux/of.h>
+#include <linux/string.h>
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
@@ -26,6 +27,11 @@
 
 struct psci_operations psci_ops;
 
+/* Type of psci support. Currently can only be enabled or disabled */
+#define PSCI_SUP_DISABLED		0
+#define PSCI_SUP_ENABLED		1
+
+static unsigned int psci;
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 
 enum psci_function {
@@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX];
 #define PSCI_RET_EOPNOTSUPP		-1
 #define PSCI_RET_EINVAL			-2
 #define PSCI_RET_EPERM			-3
+#define PSCI_RET_EALREADYON		-4
 
 static int psci_to_linux_errno(int errno)
 {
@@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno)
 		return -EINVAL;
 	case PSCI_RET_EPERM:
 		return -EPERM;
+	case PSCI_RET_EALREADYON:
+		return -EAGAIN;
 	};
 
 	return -EINVAL;
@@ -158,15 +167,18 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-static int __init psci_init(void)
+void __init psci_init(void)
 {
 	struct device_node *np;
 	const char *method;
 	u32 id;
 
+	if (psci == PSCI_SUP_DISABLED)
+		return;
+
 	np = of_find_matching_node(NULL, psci_of_match);
 	if (!np)
-		return 0;
+		return;
 
 	pr_info("probing function IDs from device-tree\n");
 
@@ -206,6 +218,35 @@ static int __init psci_init(void)
 
 out_put_node:
 	of_node_put(np);
-	return 0;
+	return;
+}
+
+int __init psci_probe(void)
+{
+	struct device_node *np;
+	int ret = -ENODEV;
+
+	if (psci == PSCI_SUP_ENABLED) {
+		np = of_find_matching_node(NULL, psci_of_match);
+		if (np)
+			ret = 0;
+	}
+
+	of_node_put(np);
+	return ret;
+}
+
+static int __init early_psci(char *val)
+{
+	int ret = 0;
+
+	if (strcmp(val, "enable") == 0)
+		psci = PSCI_SUP_ENABLED;
+	else if (strcmp(val, "disable") == 0)
+		psci = PSCI_SUP_DISABLED;
+	else
+		ret = -EINVAL;
+
+	return ret;
 }
-early_initcall(psci_init);
+early_param("psci", early_psci);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
new file mode 100644
index 000000000000..23a11424c568
--- /dev/null
+++ b/arch/arm/kernel/psci_smp.c
@@ -0,0 +1,84 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+
+/*
+ * psci_smp assumes that the following is true about PSCI:
+ *
+ * cpu_suspend   Suspend the execution on a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * cpu_off       Power down a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * no return on successful call
+ *
+ * cpu_on        Power up a CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * migrate       Migrate the context to a different CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * returns 0  success, < 0 on failure
+ *
+ */
+
+extern void secondary_startup(void);
+
+static int __cpuinit psci_boot_secondary(unsigned int cpu,
+					 struct task_struct *idle)
+{
+	if (psci_ops.cpu_on)
+		return psci_ops.cpu_on(cpu_logical_map(cpu),
+				       __pa(secondary_startup));
+	return -ENODEV;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __ref psci_cpu_die(unsigned int cpu)
+{
+       const struct psci_power_state ps = {
+               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+       };
+
+       if (psci_ops.cpu_off)
+               psci_ops.cpu_off(ps);
+
+       /* We should never return */
+       panic("psci: cpu %d failed to shutdown\n", cpu);
+}
+#else
+#define psci_cpu_die NULL
+#endif
+
+bool __init psci_smp_available(void)
+{
+	/* is cpu_on available at least? */
+	return (psci_ops.cpu_on != NULL);
+}
+
+struct smp_operations __initdata psci_smp_ops = {
+	.smp_boot_secondary	= psci_boot_secondary,
+	.cpu_die		= psci_cpu_die,
+};
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index eb83bcc70ec8..29beb8c76560 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/procinfo.h>
+#include <asm/psci.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -261,6 +262,19 @@ static int cpu_has_aliasing_icache(unsigned int arch)
 	int aliasing_icache;
 	unsigned int id_reg, num_sets, line_size;
 
+#ifdef CONFIG_BIG_LITTLE
+	/*
+	 * We expect a combination of Cortex-A15 and Cortex-A7 cores.
+	 * A7 = VIPT aliasing I-cache
+	 * A15 = PIPT (non-aliasing) I-cache
+	 * To cater for this discrepancy, let's assume aliasing I-cache
+	 * all the time.  This means unneeded extra work on the A15 but
+	 * only ptrace is affected which is not performance critical.
+	 */
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0)
+		return 1;
+#endif
+
 	/* PIPT caches never alias. */
 	if (icache_is_pipt())
 		return 0;
@@ -818,9 +832,15 @@ void __init setup_arch(char **cmdline_p)
 	unflatten_device_tree();
 
 	arm_dt_init_cpu_maps();
+	psci_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
-		smp_set_ops(mdesc->smp);
+		if (!mdesc->smp_init || !mdesc->smp_init()) {
+			if (psci_smp_available())
+				smp_set_ops(&psci_smp_ops);
+			else if (mdesc->smp)
+				smp_set_ops(mdesc->smp);
+		}
 		smp_init_cpus();
 	}
 #endif
@@ -894,6 +914,9 @@ static const char *hwcap_str[] = {
 	"vfpv4",
 	"idiva",
 	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm",
 	NULL
 };
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5a42c12767af..3c23086dc8e2 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -21,29 +21,7 @@
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-/*
- * For ARM syscalls, we encode the syscall number into the instruction.
- */
-#define SWI_SYS_SIGRETURN	(0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE))
-#define SWI_SYS_RT_SIGRETURN	(0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE))
-
-/*
- * With EABI, the syscall number has to be loaded into r7.
- */
-#define MOV_R7_NR_SIGRETURN	(0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE))
-#define MOV_R7_NR_RT_SIGRETURN	(0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
-
-/*
- * For Thumb syscalls, we pass the syscall number via r7.  We therefore
- * need two 16-bit instructions.
- */
-#define SWI_THUMB_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE))
-#define SWI_THUMB_RT_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
-
-static const unsigned long sigreturn_codes[7] = {
-	MOV_R7_NR_SIGRETURN,    SWI_SYS_SIGRETURN,    SWI_THUMB_SIGRETURN,
-	MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN,
-};
+extern const unsigned long sigreturn_codes[7];
 
 static unsigned long signal_return_offset;
 
diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S
new file mode 100644
index 000000000000..3c5d0f2170fd
--- /dev/null
+++ b/arch/arm/kernel/sigreturn_codes.S
@@ -0,0 +1,80 @@
+/*
+ * sigreturn_codes.S - code sinpets for sigreturn syscalls
+ *
+ * Created by:	Victor Kamensky, 2013-08-13
+ * Copyright:	(C) 2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/unistd.h>
+
+/*
+ * For ARM syscalls, we encode the syscall number into the instruction.
+ * With EABI, the syscall number has to be loaded into r7. As result
+ * ARM syscall sequence snippet will have move and svc in .arm encoding
+ *
+ * For Thumb syscalls, we pass the syscall number via r7.  We therefore
+ * need two 16-bit instructions in .thumb encoding
+ *
+ * Please note sigreturn_codes code are not executed in place. Instead
+ * they just copied by kernel into appropriate places. Code inside of
+ * arch/arm/kernel/signal.c is very sensitive to layout of these code
+ * snippets.
+ */
+
+#if __LINUX_ARM_ARCH__ <= 4
+	/*
+	 * Note we manually set minimally required arch that supports
+	 * required thumb opcodes for early arch versions. It is OK
+	 * for this file to be used in combination with other
+	 * lower arch variants, since these code snippets are only
+	 * used as input data.
+	 */
+	.arch armv4t
+#endif
+
+	.section .rodata
+	.global sigreturn_codes
+	.type	sigreturn_codes, #object
+
+	.arm
+
+sigreturn_codes:
+
+	/* ARM sigreturn syscall code snippet */
+	mov	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
+	swi	#(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)
+
+	/* Thumb sigreturn syscall code snippet */
+	.thumb
+	movs	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
+	swi	#0
+
+	/* ARM sigreturn_rt syscall code snippet */
+	.arm
+	mov	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
+	swi	#(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)
+
+	/* Thumb sigreturn_rt syscall code snippet */
+	.thumb
+	movs	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
+	swi	#0
+
+	/*
+	 * Note on addtional space: setup_return in signal.c
+	 * algorithm uses two words copy regardless whether
+	 * it is thumb case or not, so we need additional
+	 * word after real last entry.
+	 */
+	.arm
+	.space	4
+
+	.size	sigreturn_codes, . - sigreturn_codes
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf33415c..baf4d28213a5 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -4,6 +4,7 @@
 #include <asm/assembler.h>
 #include <asm/glue-cache.h>
 #include <asm/glue-proc.h>
+#include "entry-header.S"
 	.text
 
 /*
@@ -30,9 +31,8 @@ ENTRY(__cpu_suspend)
 	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
+	get_thread_info	r5
+	ldr	lr, [r5, #TI_CPU] 	@ cpu logical index
 	add	r3, r3, lr, lsl #2
 #endif
 	bl	__cpu_suspend_save
@@ -81,11 +81,15 @@ ENDPROC(cpu_resume_after_mmu)
 	.data
 	.align
 ENTRY(cpu_resume)
+ARM_BE8(setend be)			@ ensure we are in BE mode
 #ifdef CONFIG_SMP
+	mov	r1, #0			@ fall-back logical index for UP
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	bic	r0, #0xff000000
+	bl	cpu_logical_index 	@ return logical index in r1
+1:
 	adr	r0, sleep_save_sp
-	ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
-	ALT_UP(mov r1, #0)
-	and	r1, r1, #15
 	ldr	r0, [r0, r1, lsl #2]	@ stack phys addr
 #else
 	ldr	r0, sleep_save_sp	@ stack phys addr
@@ -102,3 +106,20 @@ sleep_save_sp:
 	.rept	CONFIG_NR_CPUS
 	.long	0				@ preserve stack phys ptr here
 	.endr
+
+#ifdef CONFIG_SMP
+cpu_logical_index:
+	adr	r3, cpu_map_ptr
+	ldr	r2, [r3]
+	add	r3, r3, r2		@ virt_to_phys(__cpu_logical_map)
+	mov	r1, #0
+1:
+	ldr	r2, [r3, r1, lsl #2]
+	cmp	r2, r0
+	moveq	pc, lr
+	add	r1, r1, #1
+	b	1b
+
+cpu_map_ptr:
+	.long __cpu_logical_map - .
+#endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb9..d6e3d1ca4ddf 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -46,6 +46,9 @@
 #include <asm/virt.h>
 #include <asm/mach/arch.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
@@ -57,7 +60,7 @@ struct secondary_data secondary_data;
  * control for which core is the next to come out of the secondary
  * boot "holding pen"
  */
-volatile int __cpuinitdata pen_release = -1;
+volatile int pen_release = -1;
 
 enum ipi_msg_type {
 	IPI_WAKEUP,
@@ -66,6 +69,7 @@ enum ipi_msg_type {
 	IPI_CALL_FUNC,
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
+	IPI_COMPLETION,
 };
 
 static DECLARE_COMPLETION(cpu_running);
@@ -87,8 +91,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = virt_to_idmap(idmap_pgd);
+	secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
@@ -463,6 +467,7 @@ static const char *ipi_types[NR_IPI] = {
 	S(IPI_CALL_FUNC, "Function call interrupts"),
 	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_COMPLETION, "completion interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -588,6 +593,19 @@ static void ipi_cpu_stop(unsigned int cpu)
 		cpu_relax();
 }
 
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+	per_cpu(cpu_completion, cpu) = completion;
+	return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+	complete(per_cpu(cpu_completion, cpu));
+}
+
 /*
  * Main handler for inter-processor interrupts
  */
@@ -604,6 +622,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	if (ipinr < NR_IPI)
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 
+	trace_arm_ipi_entry(ipinr);
 	switch (ipinr) {
 	case IPI_WAKEUP:
 		break;
@@ -638,11 +657,18 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+	case IPI_COMPLETION:
+		irq_enter();
+		ipi_complete(cpu);
+		irq_exit();
+		break;
+
 	default:
 		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
 		       cpu, ipinr);
 		break;
 	}
+	trace_arm_ipi_exit(ipinr);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 5bc1a63284e3..1aafa0d785eb 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -28,7 +28,7 @@
  */
 unsigned int __init scu_get_core_count(void __iomem *scu_base)
 {
-	unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG);
+	unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG);
 	return (ncores & 0x03) + 1;
 }
 
@@ -42,19 +42,19 @@ void scu_enable(void __iomem *scu_base)
 #ifdef CONFIG_ARM_ERRATA_764369
 	/* Cortex-A9 only */
 	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) {
-		scu_ctrl = __raw_readl(scu_base + 0x30);
+		scu_ctrl = readl_relaxed(scu_base + 0x30);
 		if (!(scu_ctrl & 1))
-			__raw_writel(scu_ctrl | 0x1, scu_base + 0x30);
+			writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30);
 	}
 #endif
 
-	scu_ctrl = __raw_readl(scu_base + SCU_CTRL);
+	scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
 	/* already enabled? */
 	if (scu_ctrl & 1)
 		return;
 
 	scu_ctrl |= 1;
-	__raw_writel(scu_ctrl, scu_base + SCU_CTRL);
+	writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
 
 	/*
 	 * Ensure that the data accessed by CPU0 before the SCU was
@@ -80,9 +80,9 @@ int scu_power_mode(void __iomem *scu_base, unsigned int mode)
 	if (mode > 3 || mode == 1 || cpu > 3)
 		return -EINVAL;
 
-	val = __raw_readb(scu_base + SCU_CPU_STATUS + cpu) & ~0x03;
+	val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03;
 	val |= mode;
-	__raw_writeb(val, scu_base + SCU_CPU_STATUS + cpu);
+	writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu);
 
 	return 0;
 }
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index f6fd1d4398c6..4971ccf012ca 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -45,7 +45,7 @@ static void twd_set_mode(enum clock_event_mode mode,
 	case CLOCK_EVT_MODE_PERIODIC:
 		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
 			| TWD_TIMER_CONTROL_PERIODIC;
-		__raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ),
+		writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ),
 			twd_base + TWD_TIMER_LOAD);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
@@ -58,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode,
 		ctrl = 0;
 	}
 
-	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
 }
 
 static int twd_set_next_event(unsigned long evt,
 			struct clock_event_device *unused)
 {
-	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
+	unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL);
 
 	ctrl |= TWD_TIMER_CONTROL_ENABLE;
 
-	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
-	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
+	writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER);
+	writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL);
 
 	return 0;
 }
@@ -82,8 +82,8 @@ static int twd_set_next_event(unsigned long evt,
  */
 static int twd_timer_ack(void)
 {
-	if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
-		__raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
+	if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) {
+		writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT);
 		return 1;
 	}
 
@@ -209,15 +209,15 @@ static void __cpuinit twd_calibrate_rate(void)
 		waitjiffies += 5;
 
 				 /* enable, no interrupt or reload */
-		__raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
+		writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL);
 
 				 /* maximum value */
-		__raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
+		writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
 
 		while (get_jiffies_64() < waitjiffies)
 			udelay(10);
 
-		count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
+		count = readl_relaxed(twd_base + TWD_TIMER_COUNTER);
 
 		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
 
@@ -275,7 +275,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	 * bother with the below.
 	 */
 	if (per_cpu(percpu_setup_called, cpu)) {
-		__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
+		writel_relaxed(0, twd_base + TWD_TIMER_CONTROL);
 		clockevents_register_device(*__this_cpu_ptr(twd_evt));
 		enable_percpu_irq(clk->irq, 0);
 		return 0;
@@ -288,7 +288,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	 * The following is done once per CPU the first time .setup() is
 	 * called.
 	 */
-	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
+	writel_relaxed(0, twd_base + TWD_TIMER_CONTROL);
 
 	clk->name = "local_timer";
 	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index c5a59546a256..677da58d9e88 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 
 #include <asm/cputype.h>
+#include <asm/smp_plat.h>
 #include <asm/topology.h>
 
 /*
@@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid)
 		cpu_topology[cpuid].socket_id, mpidr);
 }
 
+
+#ifdef CONFIG_SCHED_HMP
+
+static const char * const little_cores[] = {
+	"arm,cortex-a7",
+	NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+	const char * const *lc;
+	for (lc = little_cores; *lc; lc++)
+		if (of_device_is_compatible(cn, *lc))
+			return true;
+	return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+					struct cpumask *slow)
+{
+	struct device_node *cn = NULL;
+	int cpu;
+
+	cpumask_clear(fast);
+	cpumask_clear(slow);
+
+	/*
+	 * Use the config options if they are given. This helps testing
+	 * HMP scheduling on systems without a big.LITTLE architecture.
+	 */
+	if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+		if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+			WARN(1, "Failed to parse HMP fast cpu mask!\n");
+		if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+			WARN(1, "Failed to parse HMP slow cpu mask!\n");
+		return;
+	}
+
+	/*
+	 * Else, parse device tree for little cores.
+	 */
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+		const u32 *mpidr;
+		int len;
+
+		mpidr = of_get_property(cn, "reg", &len);
+		if (!mpidr || len != 4) {
+			pr_err("* %s missing reg property\n", cn->full_name);
+			continue;
+		}
+
+		cpu = get_logical_index(be32_to_cpup(mpidr));
+		if (cpu == -EINVAL) {
+			pr_err("couldn't get logical index for mpidr %x\n",
+							be32_to_cpup(mpidr));
+			break;
+		}
+
+		if (is_little_cpu(cn))
+			cpumask_set_cpu(cpu, slow);
+		else
+			cpumask_set_cpu(cpu, fast);
+	}
+
+	if (!cpumask_empty(fast) && !cpumask_empty(slow))
+		return;
+
+	/*
+	 * We didn't find both big and little cores so let's call all cores
+	 * fast as this will keep the system running, with all cores being
+	 * treated equal.
+	 */
+	cpumask_setall(fast);
+	cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+	struct cpumask hmp_fast_cpu_mask;
+	struct hmp_domain *domain;
+
+	arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+	/*
+	 * Initialize hmp_domains
+	 * Must be ordered with respect to compute capacity.
+	 * Fastest domain at head of list.
+	 */
+	if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+		domain = (struct hmp_domain *)
+			kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+		cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+		cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+		list_add(&domain->hmp_domains, hmp_domains_list);
+	}
+	domain = (struct hmp_domain *)
+		kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+	cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+	cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+	list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id:		cluster HW identifier
+ * @cluster_mask:	the cpumask location to be initialized, modified by the
+ *			function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+	int cpu;
+
+	if (!cluster_mask)
+		return -EINVAL;
+
+	for_each_online_cpu(cpu)
+		if (socket_id == topology_physical_package_id(cpu)) {
+			cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
 /*
  * init_cpu_topology is called at boot when only one cpu is running
  * which prevent simultaneous write access to cpu_topology array
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index d6a0fdb6c2ee..b4fd850c34b2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -34,6 +34,7 @@
 #include <asm/unwind.h>
 #include <asm/tls.h>
 #include <asm/system_misc.h>
+#include <asm/opcodes.h>
 
 static const char *handler[]= {
 	"prefetch abort",
@@ -347,15 +348,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
 int is_valid_bugaddr(unsigned long pc)
 {
 #ifdef CONFIG_THUMB2_KERNEL
-	unsigned short bkpt;
+	u16 bkpt;
+	u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE);
 #else
-	unsigned long bkpt;
+	u32 bkpt;
+	u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE);
 #endif
 
 	if (probe_kernel_address((unsigned *)pc, bkpt))
 		return 0;
 
-	return bkpt == BUG_INSTR_VALUE;
+	return bkpt == insn;
 }
 
 #endif
@@ -408,25 +411,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	if (processor_mode(regs) == SVC_MODE) {
 #ifdef CONFIG_THUMB2_KERNEL
 		if (thumb_mode(regs)) {
-			instr = ((u16 *)pc)[0];
+			instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]);
 			if (is_wide_instruction(instr)) {
-				instr <<= 16;
-				instr |= ((u16 *)pc)[1];
+				u16 inst2;
+				inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]);
+				instr = __opcode_thumb32_compose(instr, inst2);
 			}
 		} else
 #endif
-			instr = *(u32 *) pc;
+			instr = __mem_to_opcode_arm(*(u32 *) pc);
 	} else if (thumb_mode(regs)) {
 		if (get_user(instr, (u16 __user *)pc))
 			goto die_sig;
+		instr = __mem_to_opcode_thumb16(instr);
 		if (is_wide_instruction(instr)) {
 			unsigned int instr2;
 			if (get_user(instr2, (u16 __user *)pc+1))
 				goto die_sig;
-			instr <<= 16;
-			instr |= instr2;
+			instr2 = __mem_to_opcode_thumb16(instr2);
+			instr = __opcode_thumb32_compose(instr, instr2);
 		}
 	} else if (get_user(instr, (u32 __user *)pc)) {
+		instr = __mem_to_opcode_arm(instr);
 		goto die_sig;
 	}
 
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6ac..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	depends on ARM_VIRT_EXT && ARM_LPAE
@@ -41,9 +42,9 @@ config KVM_ARM_HOST
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
-	default 4 if KVM_ARM_HOST
-	default 0
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
 	help
 	  Static number of max supported virtual CPUs per VM.
 
@@ -67,6 +68,4 @@ config KVM_ARM_TIMER
 	---help---
 	  Adds support for the Architected Timers in virtual machines
 
-source drivers/virtio/Kconfig
-
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16f..f7057ed045b6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,12 @@ CFLAGS_mmu.o := -I.
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef1703b9587b..d0c8ee654bbf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -81,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 /**
  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  */
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 {
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
@@ -96,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -137,6 +127,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	kvm_timer_init(kvm);
+
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.vmid_gen = 0;
 
@@ -152,15 +144,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
-{
-	return 0;
-}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -178,20 +161,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
 		r = vgic_present;
 		break;
+	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -219,29 +207,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
@@ -280,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -295,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
@@ -334,6 +289,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+
 	kvm_arm_set_running_vcpu(NULL);
 }
 
@@ -448,15 +410,17 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-	kvm->arch.vttbr |= vmid;
+	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
 
@@ -466,22 +430,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Initialize the VGIC before running a vcpu the first time on
 	 * this VM.
 	 */
-	if (irqchip_in_kernel(vcpu->kvm) &&
-	    unlikely(!vgic_initialized(vcpu->kvm))) {
-		int ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+		ret = kvm_vgic_init(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
 
-	/*
-	 * Handle the "start in power-off" case by calling into the
-	 * PSCI code.
-	 */
-	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
-		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
-	}
-
 	return 0;
 }
 
@@ -695,6 +649,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 */
+	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.pause = true;
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -708,8 +680,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&init, argp, sizeof(init)))
 			return -EFAULT;
 
-		return kvm_vcpu_set_target(vcpu, &init);
-
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
 	}
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
@@ -767,7 +738,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 	case KVM_ARM_DEVICE_VGIC_V2:
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
 	}
@@ -793,6 +764,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			return -EFAULT;
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
+	case KVM_ARM_PREFERRED_TARGET: {
+		int err;
+		struct kvm_vcpu_init init;
+
+		err = kvm_vcpu_preferred_target(&init);
+		if (err)
+			return err;
+
+		if (copy_to_user(argp, &init, sizeof(init)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -800,8 +784,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	unsigned long long boot_pgd_ptr;
-	unsigned long long pgd_ptr;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -809,8 +793,8 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
-	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;
@@ -824,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_STARTING:
 	case CPU_STARTING_FROZEN:
-		cpu_init_hyp_mode(NULL);
+		if (__hyp_get_vectors() == hyp_default_vectors)
+			cpu_init_hyp_mode(NULL);
 		break;
 	}
 
@@ -835,6 +820,34 @@ static struct notifier_block hyp_init_cpu_nb = {
 	.notifier_call = hyp_init_cpu_notify,
 };
 
+#ifdef CONFIG_CPU_PM
+static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+				    unsigned long cmd,
+				    void *v)
+{
+	if (cmd == CPU_PM_EXIT &&
+	    __hyp_get_vectors() == hyp_default_vectors) {
+		cpu_init_hyp_mode(NULL);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyp_init_cpu_pm_nb = {
+	.notifier_call = hyp_init_cpu_pm_notifier,
+};
+
+static void __init hyp_cpu_pm_init(void)
+{
+	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+}
+#else
+static inline void hyp_cpu_pm_init(void)
+{
+}
+#endif
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
@@ -995,6 +1008,8 @@ int kvm_arch_init(void *opaque)
 		goto out_err;
 	}
 
+	hyp_cpu_pm_init();
+
 	kvm_coproc_table_init();
 	return 0;
 out_err:
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -43,6 +44,31 @@ static u32 cache_levels;
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 12
 
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+				       const struct coproc_reg *r,
+				       u64 val)
+{
+	vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+	vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+				      const struct coproc_reg *r)
+{
+	u64 val;
+
+	val = vcpu->arch.cp15[r->reg + 1];
+	val = val << 32;
+	val = val | vcpu->arch.cp15[r->reg];
+	return val;
+}
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
@@ -71,6 +97,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	/*
+	 * Compute guest MPIDR. We build a virtual cluster out of the
+	 * vcpu_id, but we read the 'U' bit from the underlying
+	 * hardware directly.
+	 */
+	vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+				     ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+				     (vcpu->vcpu_id & 3));
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+	return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p);
+	return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+	return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 l2ctlr, ncores;
+
+	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+	l2ctlr &= ~(3 << 24);
+	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	/* How many cores in the current cluster and the next ones */
+	ncores -= (vcpu->vcpu_id & ~3);
+	/* Cap it to the maximum number of cores in a single cluster */
+	ncores = min(ncores, 3U);
+	l2ctlr |= (ncores & 3) << 24;
+
+	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 actlr;
+
+	/* ACTLR contains SMP bit: make sure you create all cpus first! */
+	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+	/* Make the SMP bit consistent with the guest configuration */
+	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+		actlr |= 1U << 6;
+	else
+		actlr &= ~(1U << 6);
+
+	vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+			   const struct coproc_params *p,
+			   const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
@@ -113,6 +231,44 @@ done:
 }
 
 /*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	BUG_ON(!p->is_write);
+
+	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+	if (p->is_64bit)
+		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+
+	return true;
+}
+
+/*
+ * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
+ */
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
+		vcpu->arch.hcr &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
+
+	return true;
+}
+
+/*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
  * NAKed, so it will read the PMCR anyway.
@@ -153,37 +309,52 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+	/* MPIDR: we use VMPIDR for guest access. */
+	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+			NULL, reset_mpidr, c0_MPIDR },
+
 	/* CSSELR: swapped by interrupt.S. */
 	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
 			NULL, reset_unknown, c0_CSSELR },
 
-	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+	/* ACTLR: trapped by HCR.TAC bit. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_actlr, reset_actlr, c1_ACTLR },
 
-	/* TTBCR: swapped by interrupt.S. */
+	/* CPACR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c1_CPACR, 0x00000000 },
+
+	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
+	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c2_TTBR1 },
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c2_TTBCR, 0x00000000 },
+			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
+	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
+
 
 	/* DACR: swapped by interrupt.S. */
 	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c3_DACR },
+			access_vm_reg, reset_unknown, c3_DACR },
 
 	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_DFSR },
+			access_vm_reg, reset_unknown, c5_DFSR },
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_IFSR },
+			access_vm_reg, reset_unknown, c5_IFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_ADFSR },
+			access_vm_reg, reset_unknown, c5_ADFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_AIFSR },
+			access_vm_reg, reset_unknown, c5_AIFSR },
 
 	/* DFAR/IFAR: swapped by interrupt.S. */
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c6_DFAR },
+			access_vm_reg, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_unknown, c6_IFAR },
+			access_vm_reg, reset_unknown, c6_IFAR },
 
 	/* PAR swapped by interrupt.S */
 	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
@@ -195,6 +366,13 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
 	/*
+	 * L2CTLR access (guest wants to know #CPUs).
+	 */
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+	/*
 	 * Dummy performance monitor implementation.
 	 */
 	{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -213,9 +391,15 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c10_PRRR},
+			access_vm_reg, reset_unknown, c10_PRRR},
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c10_NMRR},
+			access_vm_reg, reset_unknown, c10_NMRR},
+
+	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR0},
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR1},
 
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
@@ -223,7 +407,7 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_val, c13_CID, 0x00000000 },
+			access_vm_reg, reset_val, c13_CID, 0x00000000 },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c13_TID_URW },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
@@ -234,6 +418,9 @@ static const struct coproc_reg cp15_regs[] = {
 	/* CNTKCTL: swapped by interrupt.S. */
 	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+	/* The Configuration Base Address Register. */
+	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -241,6 +428,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+	unsigned int i;
+
+	for (i = 1; i < table->num; i++)
+		BUG_ON(cmp_reg(&table->table[i-1],
+			       &table->table[i]) >= 0);
+
 	target_tables[table->target] = table;
 }
 
@@ -323,7 +516,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
 	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
 	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = true;
@@ -331,7 +524,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
 	params.Op2 = 0;
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
-	params.CRn = 0;
+	params.CRm = 0;
 
 	return emulate_cp15(vcpu, &params);
 }
@@ -514,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
 	{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
 };
 
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
 static int reg_from_user(void *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
 static int reg_to_user(void __user *uaddr, const void *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -534,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 {
 	struct coproc_params params;
 	const struct coproc_reg *r;
+	int ret;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -542,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	return reg_to_user(uaddr, &r->val, id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val = r->val;
+
+		ret = reg_to_user(uaddr, &val, id);
+	} else if (KVM_REG_SIZE(id) == 8) {
+		ret = reg_to_user(uaddr, &r->val, id);
+	}
+	return ret;
 }
 
 static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -550,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	struct coproc_params params;
 	const struct coproc_reg *r;
 	int err;
-	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+	u64 val;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -558,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	err = reg_from_user(&val, uaddr, id);
+	err = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val32;
+
+		err = reg_from_user(&val32, uaddr, id);
+		if (!err)
+			val = val32;
+	} else if (KVM_REG_SIZE(id) == 8) {
+		err = reg_from_user(&val, uaddr, id);
+	}
 	if (err)
 		return err;
 
@@ -574,7 +791,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
         level = (val >> 1);
@@ -836,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_get(reg->id, uaddr);
@@ -847,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return get_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit. */
-	return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		val = vcpu_cp15_reg64_get(vcpu, r);
+		ret = reg_to_user(uaddr, &val, reg->id);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	}
+
+	return ret;
 }
 
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_set(reg->id, uaddr);
@@ -866,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return set_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit */
-	return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		ret = reg_from_user(&val, uaddr, reg->id);
+		if (!ret)
+			vcpu_cp15_reg64_set(vcpu, r, val);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	}
+
+	return ret;
 }
 
 static unsigned int num_demux_regs(void)
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 0461d5c8d3de..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
 {
 	/* Look, we even formatted it for you to paste into the table! */
 	if (p->is_64bit) {
-		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
-			      p->CRm, p->Op1, p->is_write ? "write" : "read");
+		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
+			      p->CRn, p->Op1, p->is_write ? "write" : "read");
 	} else {
 		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
 			      " func_%s },\n",
@@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 		return -1;
 	if (i1->CRn != i2->CRn)
 		return i1->CRn - i2->CRn;
-	if (i1->is_64 != i2->is_64)
-		return i2->is_64 - i1->is_64;
 	if (i1->CRm != i2->CRm)
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
 		return i1->Op1 - i2->Op1;
-	return i1->Op2 - i2->Op2;
+	if (i1->Op2 != i2->Op2)
+		return i1->Op2 - i2->Op2;
+	return i2->is_64 - i1->is_64;
 }
 
 
@@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r);
+
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,101 +17,12 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	/*
-	 * Compute guest MPIDR:
-	 * (Even if we present only one VCPU to the guest on an SMP
-	 * host we don't set the U bit in the MPIDR, or vice versa, as
-	 * revealing the underlying hardware properties is likely to
-	 * be the best choice).
-	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct coproc_params *p,
-			 const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-	return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-			const struct coproc_params *p,
-			const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return write_to_read_only(vcpu, p);
-	return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-	return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 l2ctlr, ncores;
-
-	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-	l2ctlr &= ~(3 << 24);
-	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-	l2ctlr |= (ncores & 3) << 24;
-
-	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 actlr;
-
-	/* ACTLR contains SMP bit: make sure you create all cpus first! */
-	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-	/* Make the SMP bit consistent with the guest configuration */
-	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-		actlr |= 1U << 6;
-	else
-		actlr &= ~(1U << 6);
-
-	vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-			   const struct coproc_params *p,
-			   const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = 0;
-	return true;
-}
-
 /*
  * A15-specific CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-	/* MPIDR: we use VMPIDR for guest access. */
-	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-			NULL, reset_mpidr, c0_MPIDR },
-
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50078 },
-	/* ACTLR: trapped by HCR.TAC bit. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-			access_actlr, reset_actlr, c1_ACTLR },
-	/* CPACR: swapped by interrupt.S. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c1_CPACR, 0x00000000 },
-
-	/*
-	 * L2CTLR access (guest wants to know #CPUs).
-	 */
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-	/* The Configuration Base Address Register. */
-	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-	unsigned int i;
-
-	for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-		BUG_ON(cmp_reg(&a15_regs[i-1],
-			       &a15_regs[i]) >= 0);
-
 	kvm_register_target_coproc_table(&a15_target_table);
 	return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..17fc7cd479d3
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+	/* SCTLR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+	.target = KVM_ARM_TARGET_CORTEX_A7,
+	.table = a7_regs,
+	.num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+	kvm_register_target_coproc_table(&a7_target_table);
+	return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
 	if (is_pabt) {
-		/* Set DFAR and DFSR */
+		/* Set IFAR and IFSR */
 		vcpu->arch.cp15[c6_IFAR] = addr;
 		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
 		/* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -109,6 +110,73 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+	return false;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return -EFAULT;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 static unsigned long num_core_regs(void)
 {
 	return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +189,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+		+ NUM_TIMER_REGS;
 }
 
 /**
@@ -133,6 +202,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -140,6 +210,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_coproc_indices(vcpu, uindices);
 }
 
@@ -153,6 +228,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_get_reg(vcpu, reg);
 }
 
@@ -166,6 +244,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_set_reg(vcpu, reg);
 }
 
@@ -183,13 +264,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int __attribute_const__ kvm_target_cpu(void)
 {
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A7:
+		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:
 		return KVM_ARM_TARGET_CORTEX_A15;
 	default:
@@ -202,7 +279,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
 	unsigned int i;
 
-	/* We can only do a cortex A15 for now. */
+	/* We can only cope with guest==host and only on A15/A7 (for now). */
 	if (init->target != kvm_target_cpu())
 		return -EINVAL;
 
@@ -222,6 +299,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47db..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
 
 #include "trace.h"
 
-#include "trace.h"
-
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
@@ -40,21 +38,22 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -76,23 +75,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
  * @vcpu:	the vcpu pointer
  * @run:	the kvm_run structure pointer
  *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
 	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index f048338135f7..2cc14dfad049 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -71,7 +71,7 @@ __do_hyp_init:
 	bne	phase2			@ Yes, second stage init
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -137,12 +137,12 @@ phase2:
 	mov	pc, r0
 
 target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 	isb
 
 	@ Invalidate the old TLBs
 	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb
+	dsb	ish
 
 	eret
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 16cd4ba5d7fd..01dcb0e752d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -52,10 +52,10 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
-	dsb
+	dsb	ish
 	isb
 	mov	r2, #0
 	mov	r3, #0
@@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context)
 	mcr     p15, 4, r0, c8, c3, 4
 	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
 	mcr     p15, 0, r0, c7, c1, 0
-	dsb
+	dsb	ish
 	isb				@ Not necessary if followed by eret
 
 	bx	lr
@@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
 	ldr	r1, [vcpu, #VCPU_KVM]
 	add	r1, r1, #KVM_VTTBR
 	ldrd	r2, r3, [r1]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 
 	@ We're all done, just restore the GPRs and go to the guest
 	restore_guest_regs
@@ -199,8 +199,13 @@ after_vfp_restore:
 
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
 	mov	r0, r1			@ Return the return code
 	mov	r1, #0			@ Clear upper bits in return value
+#else
+	@ r1 already has return code
+	mov	r0, #0			@ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
 	bx	lr			@ return to IOCTL
 
 /********************************************************************
@@ -220,6 +225,10 @@ after_vfp_restore:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r12:     caller save
@@ -363,6 +372,11 @@ hyp_hvc:
 host_switch_to_hyp:
 	pop	{r0, r1, r2}
 
+	/* Check for __hyp_get_vectors */
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	beq	1f
+
 	push	{lr}
 	mrs	lr, SPSR
 	push	{lr}
@@ -378,7 +392,7 @@ THUMB(	orr	lr, #1)
 	pop	{lr}
 	msr	SPSR_csxf, lr
 	pop	{lr}
-	eret
+1:	eret
 
 guest_trap:
 	load_vcpu			@ Load VCPU pointer to r0
@@ -492,10 +506,10 @@ __kvm_hyp_code_end:
 	.section ".rodata"
 
 und_die_str:
-	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x\n"
 pabt_die_str:
-	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x\n"
 dabt_die_str:
-	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+	.ascii	"unexpected data abort in Hyp mode at: %#08x\n"
 svc_die_str:
-	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 6f18695a09cb..98c8c5b9a87f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
 #include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -303,13 +304,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mrrc	p15, 0, r4, r5, c7	@ PAR
+	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \store_to_vcpu == 0
-	push	{r2,r4-r5}
+	push	{r2,r4-r7}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	strd	r4, r5, [r12]
+	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 .endm
 
@@ -322,15 +327,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2,r4-r5}
+	pop	{r2,r4-r7}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	ldrd	r4, r5, [r12]
+	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mcrr	p15, 0, r4, r5, c7	@ PAR
+	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}
@@ -412,15 +421,23 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r8, [r2, #GICH_ELRSR0]
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
-
-	str	r3, [r11, #VGIC_CPU_HCR]
-	str	r4, [r11, #VGIC_CPU_VMCR]
-	str	r5, [r11, #VGIC_CPU_MISR]
-	str	r6, [r11, #VGIC_CPU_EISR]
-	str	r7, [r11, #(VGIC_CPU_EISR + 4)]
-	str	r8, [r11, #VGIC_CPU_ELRSR]
-	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-	str	r10, [r11, #VGIC_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r5, r5	)
+ARM_BE8(rev	r6, r6	)
+ARM_BE8(rev	r7, r7	)
+ARM_BE8(rev	r8, r8	)
+ARM_BE8(rev	r9, r9	)
+ARM_BE8(rev	r10, r10	)
+
+	str	r3, [r11, #VGIC_V2_CPU_HCR]
+	str	r4, [r11, #VGIC_V2_CPU_VMCR]
+	str	r5, [r11, #VGIC_V2_CPU_MISR]
+	str	r6, [r11, #VGIC_V2_CPU_EISR]
+	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
+	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	mov	r5, #0
@@ -428,9 +445,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
+ARM_BE8(rev	r6, r6	)
 	str	r6, [r3], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -455,9 +473,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r11, vcpu, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	r3, [r11, #VGIC_CPU_HCR]
-	ldr	r4, [r11, #VGIC_CPU_VMCR]
-	ldr	r8, [r11, #VGIC_CPU_APR]
+	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
+	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
+	ldr	r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r8, r8	)
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -465,9 +486,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Restore list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
+ARM_BE8(rev	r6, r6  )
 	str	r6, [r2], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -498,7 +520,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	isb
 
-	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
@@ -538,12 +560,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
 	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
-	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c14	@ CNTVOFF
 
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	ldrd	r2, r3, [r5]
-	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mcrr	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	isb
 
 	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
@@ -597,17 +619,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
 .macro configure_hyp_role operation
-	mrc	p15, 4, r2, c1, c1, 0	@ HCR
-	bic	r2, r2, #HCR_VIRT_EXCP_MASK
-	ldr	r3, =HCR_GUEST_MASK
 	.if \operation == vmentry
-	orr	r2, r2, r3
+	ldr	r2, [vcpu, #VCPU_HCR]
 	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
 	orr	r2, r2, r3
 	.else
-	bic	r2, r2, r3
+	mov	r2, #0
 	.endif
-	mcr	p15, 4, r2, c1, c1, 0
+	mcr	p15, 4, r2, c1, c1, 0	@ HCR
 .endm
 
 .macro load_vcpu
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b2..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
 
 #include "trace.h"
 
+static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
+{
+	void *datap = NULL;
+	union {
+		u8	byte;
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		tmp.byte	= data;
+		datap		= &tmp.byte;
+		break;
+	case 2:
+		tmp.hword	= data;
+		datap		= &tmp.hword;
+		break;
+	case 4:
+		tmp.word	= data;
+		datap		= &tmp.word;
+		break;
+	case 8:
+		tmp.dword	= data;
+		datap		= &tmp.dword;
+		break;
+	}
+
+	memcpy(buf, datap, len);
+}
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = buf[0];
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
 /**
  * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
  * @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
  */
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long *dest;
+	unsigned long data;
 	unsigned int len;
 	int mask;
 
 	if (!run->mmio.is_write) {
-		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
-		*dest = 0;
-
 		len = run->mmio.len;
 		if (len > sizeof(unsigned long))
 			return -EINVAL;
 
-		memcpy(dest, run->mmio.data, len);
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-				*((u64 *)run->mmio.data));
+		data = mmio_read_buf(run->mmio.data, len);
 
 		if (vcpu->arch.mmio_decode.sign_extend &&
 		    len < sizeof(unsigned long)) {
 			mask = 1U << ((len * 8) - 1);
-			*dest = (*dest ^ mask) - mask;
+			data = (data ^ mask) - mask;
 		}
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+			       data);
+		data = vcpu_data_host_to_guest(vcpu, data, len);
+		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
 	}
 
 	return 0;
@@ -63,7 +124,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		      struct kvm_exit_mmio *mmio)
 {
-	unsigned long rt, len;
+	unsigned long rt;
+	int len;
 	bool is_write, sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {
@@ -86,12 +148,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
 	mmio->is_write = is_write;
 	mmio->phys_addr = fault_ipa;
 	mmio->len = len;
@@ -110,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
 	struct kvm_exit_mmio mmio;
+	unsigned long data;
 	unsigned long rt;
 	int ret;
 
@@ -130,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
+	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
+
 	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
 					 KVM_TRACE_MMIO_READ_UNSATISFIED,
 			mmio.len, fault_ipa,
-			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+			(mmio.is_write) ? data : 0);
 
 	if (mmio.is_write)
-		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+		mmio_write_buf(mmio.data, mmio.len, data);
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e04613906f1b..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -41,6 +42,10 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
+#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	/*
@@ -85,9 +90,19 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+{
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+	pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
+}
+
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
 	pud_clear(pud);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pmd_free(NULL, pmd_table);
@@ -97,73 +112,186 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(kvm_pmd_huge(*pmd));
 	pmd_clear(pmd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-static bool pmd_empty(pmd_t *pmd)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	struct page *pmd_page = virt_to_page(pmd);
-	return page_count(pmd_page) == 1;
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (kvm_pte_table_empty(start_pte))
+		clear_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	if (pte_present(*pte)) {
-		kvm_set_pte(pte, __pte(0));
-		put_page(virt_to_page(pte));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	}
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
+
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_ptes(kvm, pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+
+	if (kvm_pmd_table_empty(start_pmd))
+		clear_pud_entry(kvm, pud, start_addr);
 }
 
-static bool pte_empty(pte_t *pte)
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	struct page *pte_page = virt_to_page(pte);
-	return page_count(pte_page) == 1;
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+
+	if (kvm_pud_table_empty(start_pud))
+		clear_pgd_entry(kvm, pgd, start_addr);
 }
 
+
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			unsigned long long start, u64 size)
+			phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		unmap_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+			      phys_addr_t addr, phys_addr_t end)
+{
 	pte_t *pte;
-	unsigned long long addr = start, end = start + size;
-	u64 range;
 
-	while (addr < end) {
-		pgd = pgdp + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
-			continue;
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
 		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
 
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
-			continue;
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+			} else {
+				stage2_flush_ptes(kvm, pmd, addr, next);
+			}
 		}
+	} while (pmd++, addr = next, addr != end);
+}
 
-		pte = pte_offset_kernel(pmd, addr);
-		clear_pte_entry(kvm, pte, addr);
-		range = PAGE_SIZE;
-
-		/* If we emptied the pte, walk back up the ladder */
-		if (pte_empty(pte)) {
-			clear_pmd_entry(kvm, pmd, addr);
-			range = PMD_SIZE;
-			if (pmd_empty(pmd)) {
-				clear_pud_entry(kvm, pud, addr);
-				range = PUD_SIZE;
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+			} else {
+				stage2_flush_pmds(kvm, pud, addr, next);
 			}
 		}
+	} while (pud++, addr = next, addr != end);
+}
 
-		addr += range;
-	}
+static void stage2_flush_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t next;
+	pgd_t *pgd;
+
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		stage2_flush_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_flush_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 }
 
 /**
@@ -178,14 +306,14 @@ void free_boot_hyp_pgd(void)
 	if (boot_hyp_pgd) {
 		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-		kfree(boot_hyp_pgd);
+		free_pages((unsigned long)boot_hyp_pgd, pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	kfree(init_bounce_page);
+	free_page((unsigned long)init_bounce_page);
 	init_bounce_page = NULL;
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -215,7 +343,7 @@ void free_hyp_pgds(void)
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
 			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-		kfree(hyp_pgd);
+		free_pages((unsigned long)hyp_pgd, pgd_order);
 		hyp_pgd = NULL;
 	}
 
@@ -404,9 +532,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
@@ -451,29 +576,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, old_pte;
 
-	/* Create 2nd stage page table mapping - Level 1 */
 	pgd = kvm->arch.pgd + pgd_index(addr);
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
 		if (!cache)
-			return 0; /* ignore calls from kvm_set_spte_hva */
+			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
 		pud_populate(NULL, pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	pmd = pmd_offset(pud, addr);
+	return pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+	pmd_t *pmd, old_pmd;
 
-	/* Create 2nd stage page table mapping - Level 2 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	VM_BUG_ON(!pmd);
+
+	/*
+	 * Mapping in huge pages should only happen through a fault.  If a
+	 * page is merged into a transparent huge page, the individual
+	 * subpages of that huge page should be unmapped through MMU
+	 * notifiers before we get here.
+	 *
+	 * Merging of CompoundPages is not supported; they should become
+	 * splitting first, unmapped, merged, and mapped back in on-demand.
+	 */
+	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+	old_pmd = *pmd;
+	kvm_set_pmd(pmd, *new_pmd);
+	if (pmd_present(old_pmd))
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	else
+		get_page(virt_to_page(pmd));
+	return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+
+	/* Create stage-2 page table mapping - Level 1 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	if (!pmd) {
+		/*
+		 * Ignore calls from kvm_set_spte_hva for unallocated
+		 * address ranges.
+		 */
+		return 0;
+	}
+
+	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
@@ -520,7 +687,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
 	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
 		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
-		kvm_set_s2pte_writable(&pte);
 
 		ret = mmu_topup_memory_cache(&cache, 2, 2);
 		if (ret)
@@ -539,23 +705,97 @@ out:
 	return ret;
 }
 
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+	pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompound(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		mask = PTRS_PER_PMD - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			*ipap &= PMD_MASK;
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  gfn_t gfn, struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
 {
-	pte_t new_pte;
-	pfn_t pfn;
 	int ret;
-	bool write_fault, writable;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
 
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
 	}
 
+	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (is_vm_hugetlb_page(vma)) {
+		hugetlb = true;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	} else {
+		/*
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
+		 */
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
+			force_pte = true;
+	}
+	up_read(&current->mm->mmap_sem);
+
 	/* We need minimum second+third level pages */
 	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 	if (ret)
@@ -573,26 +813,44 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
-	new_pte = pfn_pte(pfn, PAGE_S2);
-	coherent_icache_guest_page(vcpu->kvm, gfn);
+	if (kvm_is_mmio_pfn(pfn))
+		mem_type = PAGE_S2_DEVICE;
 
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
-	if (writable) {
-		kvm_set_s2pte_writable(&new_pte);
-		kvm_set_pfn_dirty(pfn);
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+	if (hugetlb) {
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
+		new_pmd = pmd_mkhuge(new_pmd);
+		if (writable) {
+			kvm_set_s2pmd_writable(&new_pmd);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+	} else {
+		pte_t new_pte = pfn_pte(pfn, mem_type);
+		if (writable) {
+			kvm_set_s2pte_writable(&new_pte);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+				     mem_type == PAGE_S2_DEVICE);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
 
 out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
-	return 0;
+	return ret;
 }
 
 /**
@@ -612,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
-	bool is_iabt;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
 	int ret, idx;
 
@@ -623,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault(vcpu);
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu), fault_status);
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
 		return -EFAULT;
 	}
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	gfn = fault_ipa >> PAGE_SHIFT;
-	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(vcpu);
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
 			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -641,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		if (fault_status != FSC_FAULT) {
-			kvm_err("Unsupported fault status on io memory: %#lx\n",
-				fault_status);
-			ret = -EFAULT;
-			goto out_unlock;
-		}
-
 		/*
 		 * The IPA is reported as [MAX:12], so we need to
 		 * complement it with the bottom 12 bits from the
@@ -659,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
@@ -779,9 +1034,9 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 		/*
@@ -791,7 +1046,7 @@ int kvm_mmu_init(void)
 		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
 		phys_addr_t phys_base;
 
-		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
 		if (!init_bounce_page) {
 			kvm_err("Couldn't allocate HYP init bounce page\n");
 			err = -ENOMEM;
@@ -808,7 +1063,7 @@ int kvm_mmu_init(void)
 		 */
 		kvm_flush_dcache_to_poc(init_bounce_page, len);
 
-		phys_base = virt_to_phys(init_bounce_page);
+		phys_base = kvm_virt_to_phys(init_bounce_page);
 		hyp_idmap_vector += phys_base - hyp_idmap_start;
 		hyp_idmap_start = phys_base;
 		hyp_idmap_end = phys_base + len;
@@ -817,8 +1072,9 @@ int kvm_mmu_init(void)
 			 (unsigned long)phys_base);
 	}
 
-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
 	if (!hyp_pgd || !boot_hyp_pgd) {
 		kvm_err("Hyp mode PGD not allocated\n");
 		err = -ENOMEM;
@@ -864,3 +1120,49 @@ out:
 	free_hyp_pgds();
 	return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
+{
+	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = old->npages << PAGE_SHIFT;
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		spin_lock(&kvm->mmu_lock);
+		unmap_stage2_range(kvm, gpa, size);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a3667..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
+#include <asm/cputype.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_psci.h>
 
@@ -26,6 +27,36 @@
  * as described in ARM document number ARM DEN 0022A.
  */
 
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct kvm *kvm = source_vcpu->kvm;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long context_id;
+	unsigned long mpidr;
 	phys_addr_t target_pc;
+	int i;
 
 	cpu_id = *vcpu_reg(source_vcpu, 1);
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
-	if (cpu_id >= atomic_read(&kvm->online_vcpus))
-		return KVM_PSCI_RET_INVAL;
-
-	target_pc = *vcpu_reg(source_vcpu, 2);
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+			vcpu = tmp;
+			break;
+		}
+	}
 
-	vcpu = kvm_get_vcpu(kvm, cpu_id);
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu)
+		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.pause) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
 
-	wq = kvm_arch_vcpu_wq(vcpu);
-	if (!waitqueue_active(wq))
-		return KVM_PSCI_RET_INVAL;
+	target_pc = *vcpu_reg(source_vcpu, 2);
+	context_id = *vcpu_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		vcpu_set_thumb(vcpu);
 	}
 
+	/* Propagate caller endianness */
+	if (kvm_vcpu_is_be(source_vcpu))
+		kvm_vcpu_set_be(vcpu);
+
 	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	*vcpu_reg(vcpu, 0) = context_id;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 
+	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
-	return KVM_PSCI_RET_SUCCESS;
+	return PSCI_RET_SUCCESS;
 }
 
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
- */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = *vcpu_reg(vcpu, 1);
+	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if (((mpidr & target_affinity_mask) == target_affinity) &&
+		    !tmp->arch.pause) {
+			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	int ret = 1;
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	switch (psci_fn) {
 	case KVM_PSCI_FN_CPU_OFF:
 		kvm_psci_vcpu_off(vcpu);
-		val = KVM_PSCI_RET_SUCCESS;
+		val = PSCI_RET_SUCCESS;
 		break;
 	case KVM_PSCI_FN_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
 	case KVM_PSCI_FN_CPU_SUSPEND:
 	case KVM_PSCI_FN_MIGRATE:
-		val = KVM_PSCI_RET_NI;
+		val = PSCI_RET_NOT_SUPPORTED;
 		break;
-
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
+	default:
+		return -EINVAL;
+	};
 }
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554cd..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,16 +27,21 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_coproc.h>
 
+#include <kvm/arm_arch_timer.h>
+
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
-
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static const struct kvm_irq_level cortexa_vtimer_irq = {
+	{ .irq = 27 },
+	.level = 1,
+};
+
 
 /*******************************************************************************
  * Exported reset function
@@ -51,24 +56,28 @@ static struct kvm_regs a15_regs_reset = {
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_regs *cpu_reset;
+	struct kvm_regs *reset_regs;
+	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
+	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > a15_max_cpu_idx)
-			return -EINVAL;
-		cpu_reset = &a15_regs_reset;
+		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+		cpu_vtimer_irq = &cortexa_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;
 	}
 
 	/* Reset core registers */
-	memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+	memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
 
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);
 
+	/* Reset arch_timer context */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
 		__entry->ipa			= ipa;
 	),
 
-	TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
-		  "ipa %#16llx, hsr %#08lx",
-		  __entry->vcpu_pc, __entry->hxfar,
-		  __entry->ipa, __entry->hsr)
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
 );
 
 TRACE_EVENT(kvm_irq_line,
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
deleted file mode 100644
index 17c5ac7d10ed..000000000000
--- a/arch/arm/kvm/vgic.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-/*
- * How the whole thing works (courtesy of Christoffer Dall):
- *
- * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
- * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
- *   recalculated
- * - To calculate the oracle, we need info for each cpu from
- *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
- *     registers, stored on each vcpu. We only keep one bit of
- *     information per interrupt, making sure that only one vcpu can
- *     accept the interrupt.
- * - The same is true when injecting an interrupt, except that we only
- *   consider a single interrupt at a time. The irq_spi_cpu array
- *   contains the target CPU for each SPI.
- *
- * The handling of level interrupts adds some extra complexity. We
- * need to track when the interrupt has been EOIed, so we can sample
- * the 'line' again. This is achieved as such:
- *
- * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
- *   will be ignored for further interrupts. The interrupt is injected
- *   into the vcpu with the GICH_LR_EOI bit set (generate a
- *   maintenance interrupt on EOI).
- * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
- *   interrupt line to be sampled again.
- */
-
-#define VGIC_ADDR_UNDEF		(-1)
-#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
-
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
-#define ACCESS_READ_VALUE	(1 << 0)
-#define ACCESS_READ_RAZ		(0 << 0)
-#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
-#define ACCESS_WRITE_IGNORED	(0 << 1)
-#define ACCESS_WRITE_SETBIT	(1 << 1)
-#define ACCESS_WRITE_CLEARBIT	(2 << 1)
-#define ACCESS_WRITE_VALUE	(3 << 1)
-#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
-
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_update_state(struct kvm *kvm);
-static void vgic_kick_vcpus(struct kvm *kvm);
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
-static u32 vgic_nr_lr;
-
-static unsigned int vgic_maint_irq;
-
-static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
-				int cpuid, u32 offset)
-{
-	offset >>= 2;
-	if (!offset)
-		return x->percpu[cpuid].reg;
-	else
-		return x->shared.reg + offset - 1;
-}
-
-static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
-				   int cpuid, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
-
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
-}
-
-static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
-				    int irq, int val)
-{
-	unsigned long *reg;
-
-	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
-	} else {
-		reg =  x->shared.reg_ul;
-		irq -= VGIC_NR_PRIVATE_IRQS;
-	}
-
-	if (val)
-		set_bit(irq, reg);
-	else
-		clear_bit(irq, reg);
-}
-
-static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
-{
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
-}
-
-static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
-{
-	return x->shared.reg_ul;
-}
-
-static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
-{
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
-}
-
-#define VGIC_CFG_LEVEL	0
-#define VGIC_CFG_EDGE	1
-
-static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int irq_val;
-
-	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
-	return irq_val == VGIC_CFG_EDGE;
-}
-
-static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
-}
-
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
-}
-
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
-}
-
-static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
-}
-
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
-}
-
-static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			  vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
-{
-	return *((u32 *)mmio->data) & mask;
-}
-
-static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
-{
-	*((u32 *)mmio->data) = value & mask;
-}
-
-/**
- * vgic_reg_access - access vgic register
- * @mmio:   pointer to the data describing the mmio access
- * @reg:    pointer to the virtual backing of vgic distributor data
- * @offset: least significant 2 bits used for word offset
- * @mode:   ACCESS_ mode (see defines above)
- *
- * Helper to make vgic register access easier using one of the access
- * modes defined for vgic register access
- * (read,raz,write-ignored,setbit,clearbit,write)
- */
-static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
-			    phys_addr_t offset, int mode)
-{
-	int word_offset = (offset & 3) * 8;
-	u32 mask = (1UL << (mmio->len * 8)) - 1;
-	u32 regval;
-
-	/*
-	 * Any alignment fault should have been delivered to the guest
-	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
-	 */
-
-	if (reg) {
-		regval = *reg;
-	} else {
-		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
-		regval = 0;
-	}
-
-	if (mmio->is_write) {
-		u32 data = mmio_data_read(mmio, mask) << word_offset;
-		switch (ACCESS_WRITE_MASK(mode)) {
-		case ACCESS_WRITE_IGNORED:
-			return;
-
-		case ACCESS_WRITE_SETBIT:
-			regval |= data;
-			break;
-
-		case ACCESS_WRITE_CLEARBIT:
-			regval &= ~data;
-			break;
-
-		case ACCESS_WRITE_VALUE:
-			regval = (regval & ~(mask << word_offset)) | data;
-			break;
-		}
-		*reg = regval;
-	} else {
-		switch (ACCESS_READ_MASK(mode)) {
-		case ACCESS_READ_RAZ:
-			regval = 0;
-			/* fall through */
-
-		case ACCESS_READ_VALUE:
-			mmio_data_write(mmio, mask, regval >> word_offset);
-		}
-	}
-}
-
-static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
-			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	u32 word_offset = offset & 3;
-
-	switch (offset & ~3) {
-	case 0:			/* CTLR */
-		reg = vcpu->kvm->arch.vgic.enabled;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-		if (mmio->is_write) {
-			vcpu->kvm->arch.vgic.enabled = reg & 1;
-			vgic_update_state(vcpu->kvm);
-			return true;
-		}
-		break;
-
-	case 4:			/* TYPER */
-		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-
-	case 8:			/* IIDR */
-		reg = 0x4B00043B;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
-			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	vgic_reg_access(mmio, NULL, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-	return false;
-}
-
-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
-					 struct kvm_exit_mmio *mmio,
-					 phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		if (offset < 4) /* Force SGI enabled */
-			*reg |= 0xffff;
-		vgic_retire_disabled_irqs(vcpu);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
-					struct kvm_exit_mmio *mmio,
-					phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
-					  struct kvm_exit_mmio *mmio,
-					  phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
-				     struct kvm_exit_mmio *mmio,
-				     phys_addr_t offset)
-{
-	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
-					vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	return false;
-}
-
-#define GICD_ITARGETSR_SIZE	32
-#define GICD_CPUTARGETS_BITS	8
-#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
-static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 val = 0;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
-
-	return val;
-}
-
-static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 target;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	/*
-	 * Pick the LSB in each byte. This ensures we target exactly
-	 * one vcpu per IRQ. If the byte is null, assume we target
-	 * CPU0.
-	 */
-	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
-		int shift = i * GICD_CPUTARGETS_BITS;
-		target = ffs((val >> shift) & 0xffU);
-		target = target ? (target - 1) : 0;
-		dist->irq_spi_cpu[irq + i] = target;
-		kvm_for_each_vcpu(c, vcpu, kvm) {
-			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-			if (c == target)
-				set_bit(irq + i, bmap);
-			else
-				clear_bit(irq + i, bmap);
-		}
-	}
-}
-
-static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
-				   struct kvm_exit_mmio *mmio,
-				   phys_addr_t offset)
-{
-	u32 reg;
-
-	/* We treat the banked interrupts targets as read-only */
-	if (offset < 32) {
-		u32 roreg = 1 << vcpu->vcpu_id;
-		roreg |= roreg << 8;
-		roreg |= roreg << 16;
-
-		vgic_reg_access(mmio, &roreg, offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		return false;
-	}
-
-	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static u32 vgic_cfg_expand(u16 val)
-{
-	u32 res = 0;
-	int i;
-
-	/*
-	 * Turn a 16bit value like abcd...mnop into a 32bit word
-	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
-
-	return res;
-}
-
-static u16 vgic_cfg_compress(u32 val)
-{
-	u16 res = 0;
-	int i;
-
-	/*
-	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
-	 * abcd...mnop which is what we really care about.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
-
-	return res;
-}
-
-/*
- * The distributor uses 2 bits per IRQ for the CFG register, but the
- * LSB is always 0. As such, we only keep the upper bit, and use the
- * two above functions to compress/expand the bits
- */
-static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
-	if (offset & 2)
-		val = *reg >> 16;
-	else
-		val = *reg & 0xffff;
-
-	val = vgic_cfg_expand(val);
-	vgic_reg_access(mmio, &val, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		if (offset < 4) {
-			*reg = ~0U; /* Force PPIs/SGIs to 1 */
-			return false;
-		}
-
-		val = vgic_cfg_compress(val);
-		if (offset & 2) {
-			*reg &= 0xffff;
-			*reg |= val << 16;
-		} else {
-			*reg &= 0xffff << 16;
-			*reg |= val;
-		}
-	}
-
-	return false;
-}
-
-static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_dispatch_sgi(vcpu, reg);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * I would have liked to use the kvm_bus_io_*() API instead, but it
- * cannot cope with banked registers (only the VM pointer is passed
- * around, and we need the vcpu). One of these days, someone please
- * fix it!
- */
-struct mmio_range {
-	phys_addr_t base;
-	unsigned long len;
-	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
-			    phys_addr_t offset);
-};
-
-static const struct mmio_range vgic_ranges[] = {
-	{
-		.base		= GIC_DIST_CTRL,
-		.len		= 12,
-		.handle_mmio	= handle_mmio_misc,
-	},
-	{
-		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_priority_reg,
-	},
-	{
-		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_target_reg,
-	},
-	{
-		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
-		.handle_mmio	= handle_mmio_cfg_reg,
-	},
-	{
-		.base		= GIC_DIST_SOFTINT,
-		.len		= 4,
-		.handle_mmio	= handle_mmio_sgi_reg,
-	},
-	{}
-};
-
-static const
-struct mmio_range *find_matching_range(const struct mmio_range *ranges,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t base)
-{
-	const struct mmio_range *r = ranges;
-	phys_addr_t addr = mmio->phys_addr - base;
-
-	while (r->len) {
-		if (addr >= r->base &&
-		    (addr + mmio->len) <= (r->base + r->len))
-			return r;
-		r++;
-	}
-
-	return NULL;
-}
-
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access
- * @vcpu:	pointer to the vcpu performing the access
- * @run:	pointer to the kvm_run structure
- * @mmio:	pointer to the data describing the access
- *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio)
-{
-	const struct mmio_range *range;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long base = dist->vgic_dist_base;
-	bool updated_state;
-	unsigned long offset;
-
-	if (!irqchip_in_kernel(vcpu->kvm) ||
-	    mmio->phys_addr < base ||
-	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
-		return false;
-
-	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
-	if (mmio->len > 4) {
-		kvm_inject_dabt(vcpu, mmio->phys_addr);
-		return true;
-	}
-
-	range = find_matching_range(vgic_ranges, mmio, base);
-	if (unlikely(!range || !range->handle_mmio)) {
-		pr_warn("Unhandled access %d %08llx %d\n",
-			mmio->is_write, mmio->phys_addr, mmio->len);
-		return false;
-	}
-
-	spin_lock(&vcpu->kvm->arch.vgic.lock);
-	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
-	spin_unlock(&vcpu->kvm->arch.vgic.lock);
-	kvm_prepare_mmio(run, mmio);
-	kvm_handle_mmio_return(vcpu, run);
-
-	if (updated_state)
-		vgic_kick_vcpus(vcpu->kvm);
-
-	return true;
-}
-
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	int nrcpus = atomic_read(&kvm->online_vcpus);
-	u8 target_cpus;
-	int sgi, mode, c, vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	sgi = reg & 0xf;
-	target_cpus = (reg >> 16) & 0xff;
-	mode = (reg >> 24) & 3;
-
-	switch (mode) {
-	case 0:
-		if (!target_cpus)
-			return;
-
-	case 1:
-		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
-		break;
-
-	case 2:
-		target_cpus = 1 << vcpu_id;
-		break;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (target_cpus & 1) {
-			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
-			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
-		}
-
-		target_cpus >>= 1;
-	}
-}
-
-static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
-	unsigned long pending_private, pending_shared;
-	int vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
-	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
-
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
-	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
-	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
-
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
-	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
-	bitmap_and(pend_shared, pend_shared,
-		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
-
-	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
-	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
-}
-
-/*
- * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
- */
-static void vgic_update_state(struct kvm *kvm)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
-		return;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (compute_pending_for_cpu(vcpu)) {
-			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
-		}
-	}
-}
-
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define MK_LR_PEND(src, irq)	\
-	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
-
-/*
- * An interrupt may have been disabled after being made pending on the
- * CPU interface (the classic case is a timer running while we're
- * rebooting the guest - the interrupt would kick as soon as the CPU
- * interface gets enabled, with deadly consequences).
- *
- * The solution is to examine already active LRs, and check the
- * interrupt is still enabled. If not, just retire it.
- */
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-			clear_bit(lr, vgic_cpu->lr_used);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
-			if (vgic_irq_is_active(vcpu, irq))
-				vgic_irq_clear_active(vcpu, irq);
-		}
-	}
-}
-
-/*
- * Queue an interrupt to a CPU virtual interface. Return true on success,
- * or false if it wasn't possible to queue it.
- */
-static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	/* Sanitize the input... */
-	BUG_ON(sgi_source_id & ~7);
-	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
-
-	kvm_debug("Queue IRQ%d\n", irq);
-
-	lr = vgic_cpu->vgic_irq_lr_map[irq];
-
-	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
-		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_lr[lr]);
-		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-		return true;
-	}
-
-	/* Try to use another LR for this interrupt */
-	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic_cpu->nr_lr);
-	if (lr >= vgic_cpu->nr_lr)
-		return false;
-
-	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
-	vgic_cpu->vgic_irq_lr_map[irq] = lr;
-	set_bit(lr, vgic_cpu->lr_used);
-
-	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
-
-	return true;
-}
-
-static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long sources;
-	int vcpu_id = vcpu->vcpu_id;
-	int c;
-
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
-
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
-		if (vgic_queue_irq(vcpu, c, irq))
-			clear_bit(c, &sources);
-	}
-
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
-
-	/*
-	 * If the sources bitmap has been cleared it means that we
-	 * could queue all the SGIs onto link registers (see the
-	 * clear_bit above), and therefore we are done with them in
-	 * our emulated gic and can get rid of them.
-	 */
-	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
-		vgic_cpu_irq_clear(vcpu, irq);
-		return true;
-	}
-
-	return false;
-}
-
-static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
-{
-	if (vgic_irq_is_active(vcpu, irq))
-		return true; /* level interrupt, already queued */
-
-	if (vgic_queue_irq(vcpu, 0, irq)) {
-		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
-			vgic_cpu_irq_clear(vcpu, irq);
-		} else {
-			vgic_irq_set_active(vcpu, irq);
-		}
-
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * Fill the list registers with pending interrupts before running the
- * guest.
- */
-static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i, vcpu_id;
-	int overflow = 0;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	/*
-	 * We may not have any pending interrupt, or the interrupts
-	 * may have been serviced from another vcpu. In all cases,
-	 * move along.
-	 */
-	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
-		goto epilog;
-	}
-
-	/* SGIs */
-	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
-		if (!vgic_queue_sgi(vcpu, i))
-			overflow = 1;
-	}
-
-	/* PPIs */
-	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i))
-			overflow = 1;
-	}
-
-	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
-			overflow = 1;
-	}
-
-epilog:
-	if (overflow) {
-		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
-	} else {
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-		/*
-		 * We're about to run this VCPU, and we've consumed
-		 * everything the distributor had in store for
-		 * us. Claim we don't have anything pending. We'll
-		 * adjust that if needed while exiting.
-		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
-	}
-}
-
-static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	bool level_pending = false;
-
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
-		/*
-		 * Some level interrupts have been EOIed. Clear their
-		 * active bit.
-		 */
-		int lr, irq;
-
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
-				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
-
-			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
-				level_pending = true;
-			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
-			}
-
-			/*
-			 * Despite being EOIed, the LR may not have
-			 * been marked as empty.
-			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
-		}
-	}
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_U)
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-
-	return level_pending;
-}
-
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
-static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int lr, pending;
-	bool level_pending;
-
-	level_pending = vgic_process_maintenance(vcpu);
-
-	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
-			 vgic_cpu->nr_lr) {
-		int irq;
-
-		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
-			continue;
-
-		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		BUG_ON(irq >= VGIC_NR_IRQS);
-		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-	}
-
-	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
-				      vgic_cpu->nr_lr);
-	if (level_pending || pending < vgic_cpu->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_flush_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_sync_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-static void vgic_kick_vcpus(struct kvm *kvm)
-{
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	/*
-	 * We've injected an interrupt, time to find out who deserves
-	 * a good kick...
-	 */
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (kvm_vgic_vcpu_pending_irq(vcpu))
-			kvm_vcpu_kick(vcpu);
-	}
-}
-
-static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
-{
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
-
-	/*
-	 * Only inject an interrupt if:
-	 * - edge triggered and we have a rising edge
-	 * - level triggered and we change level
-	 */
-	if (is_edge)
-		return level > state;
-	else
-		return level != state;
-}
-
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
-				  unsigned int irq_num, bool level)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
-	int enabled;
-	bool ret = true;
-
-	spin_lock(&dist->lock);
-
-	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
-
-	if (!vgic_validate_injection(vcpu, irq_num, level)) {
-		ret = false;
-		goto out;
-	}
-
-	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
-		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
-		vcpu = kvm_get_vcpu(kvm, cpuid);
-	}
-
-	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
-
-	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear(vcpu, irq_num);
-
-	enabled = vgic_irq_is_enabled(vcpu, irq_num);
-
-	if (!enabled) {
-		ret = false;
-		goto out;
-	}
-
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
-		/*
-		 * Level interrupt in progress, will be picked up
-		 * when EOId.
-		 */
-		ret = false;
-		goto out;
-	}
-
-	if (level) {
-		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
-	}
-
-out:
-	spin_unlock(&dist->lock);
-
-	return ret;
-}
-
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm:     The VM structure pointer
- * @cpuid:   The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
- * @level:   Edge-triggered:  true:  to trigger the interrupt
- *			      false: to ignore the call
- *	     Level-sensitive  true:  activates an interrupt
- *			      false: deactivates an interrupt
- *
- * The GIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts.  You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level)
-{
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
-
-	return 0;
-}
-
-static irqreturn_t vgic_maintenance_handler(int irq, void *data)
-{
-	/*
-	 * We cannot rely on the vgic maintenance interrupt to be
-	 * delivered synchronously. This means we can only use it to
-	 * exit the VM, and we perform the handling of EOIed
-	 * interrupts on the exit path (see vgic_process_maintenance).
-	 */
-	return IRQ_HANDLED;
-}
-
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
-		return -EBUSY;
-
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
-
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vgic_cpu->vgic_vmcr = 0;
-
-	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
-
-	return 0;
-}
-
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic_maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic_maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
-
-	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
-	if (!vgic_node) {
-		kvm_err("error: no compatible vgic node in DT\n");
-		return -ENODEV;
-	}
-
-	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic_maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
-		goto out;
-	}
-
-	ret = register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain VCTRL resource\n");
-		goto out_free_irq;
-	}
-
-	vgic_vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic_vctrl_base) {
-		kvm_err("Cannot ioremap VCTRL\n");
-		ret = -ENOMEM;
-		goto out_free_irq;
-	}
-
-	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-
-	ret = create_hyp_io_mappings(vgic_vctrl_base,
-				     vgic_vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out_unmap;
-	}
-
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic_maint_irq);
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-	vgic_vcpu_base = vcpu_res.start;
-
-	goto out;
-
-out_unmap:
-	iounmap(vgic_vctrl_base);
-out_free_irq:
-	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
-out:
-	of_node_put(vgic_node);
-	return ret;
-}
-
-int kvm_vgic_init(struct kvm *kvm)
-{
-	int ret = 0, i;
-
-	mutex_lock(&kvm->lock);
-
-	if (vgic_initialized(kvm))
-		goto out;
-
-	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
-		kvm_err("Need to set vgic cpu and dist addresses first\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
-	if (ret) {
-		kvm_err("Unable to remap VGIC CPU to VCPU\n");
-		goto out;
-	}
-
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
-
-	kvm_timer_init(kvm);
-	kvm->arch.vgic.ready = true;
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-int kvm_vgic_create(struct kvm *kvm)
-{
-	int ret = 0;
-
-	mutex_lock(&kvm->lock);
-
-	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
-		ret = -EEXIST;
-		goto out;
-	}
-
-	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
-	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
-	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
-
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
-{
-	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
-	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
-
-	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
-		return 0;
-	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
-	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
-		return -EBUSY;
-	return 0;
-}
-
-static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
-			      phys_addr_t addr, phys_addr_t size)
-{
-	int ret;
-
-	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
-		return -EEXIST;
-	if (addr + size < addr)
-		return -EINVAL;
-
-	ret = vgic_ioaddr_overlap(kvm);
-	if (ret)
-		return ret;
-	*ioaddr = addr;
-	return ret;
-}
-
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	int r = 0;
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-
-	if (addr & ~KVM_PHYS_MASK)
-		return -E2BIG;
-
-	if (addr & (SZ_4K - 1))
-		return -EINVAL;
-
-	mutex_lock(&kvm->lock);
-	switch (type) {
-	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-				       addr, KVM_VGIC_V2_DIST_SIZE);
-		break;
-	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-				       addr, KVM_VGIC_V2_CPU_SIZE);
-		break;
-	default:
-		r = -ENODEV;
-	}
-
-	mutex_unlock(&kvm->lock);
-	return r;
-}
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@ USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@ USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi
diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c
index 753b94f3fca7..d88234e14f96 100644
--- a/arch/arm/mach-exynos/mach-exynos5-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos5-dt.c
@@ -14,6 +14,7 @@
 #include <linux/memblock.h>
 #include <linux/io.h>
 #include <linux/clocksource.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/mach/arch.h>
 #include <mach/regs-pmu.h>
@@ -23,11 +24,31 @@
 
 #include "common.h"
 
+static u64 dma_mask64 = DMA_BIT_MASK(64);
+
 static void __init exynos5_dt_map_io(void)
 {
 	exynos_init_io(NULL, 0);
 }
 
+static int exynos5250_platform_notifier(struct notifier_block *nb,
+				  unsigned long event, void *__dev)
+{
+	struct device *dev = __dev;
+
+	if (event != BUS_NOTIFY_ADD_DEVICE)
+		return NOTIFY_DONE;
+
+	dev->dma_mask = &dma_mask64;
+	dev->coherent_dma_mask = DMA_BIT_MASK(64);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block exynos5250_platform_nb = {
+	.notifier_call = exynos5250_platform_notifier,
+};
+
 static void __init exynos5_dt_machine_init(void)
 {
 	struct device_node *i2c_np;
@@ -52,6 +73,11 @@ static void __init exynos5_dt_machine_init(void)
 		}
 	}
 
+	if (config_enabled(CONFIG_ARM_LPAE) &&
+			of_machine_is_compatible("samsung,exynos5250"))
+		bus_register_notifier(&platform_bus_type,
+				&exynos5250_platform_nb);
+
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index cd9fcb1cd7ab..b8466fb00f55 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -2,6 +2,7 @@ config ARCH_HIGHBANK
 	bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7
 	select ARCH_HAS_CPUFREQ
 	select ARCH_HAS_OPP
+	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_AMBA
 	select ARM_GIC
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 35d1029d7c9d..c37d31e15a06 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/pl320-ipc.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index 73a2d905af8a..72de05f09cb8 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -1,9 +1,5 @@
 if ARCH_IXP4XX
 
-config ARCH_SUPPORTS_BIG_ENDIAN
-	bool
-	default y
-
 menu "Intel IXP4xx Implementation Options"
 
 comment "IXP4xx Platforms"
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 80a8bcacd9d5..317cdb800099 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -1,5 +1,6 @@
 config ARCH_MVEBU
 	bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7
+	select ARCH_SUPPORTS_BIG_ENDIAN
 	select CLKSRC_MMIO
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
index 5476669ba905..ee7598fe75db 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -20,6 +20,8 @@
 #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0
 #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4
 
+#include <asm/assembler.h>
+
 	.text
 /*
  * r0: Coherency fabric base register address
@@ -29,6 +31,7 @@ ENTRY(ll_set_cpu_coherent)
 	/* Create bit by cpu index */
 	mov	r3, #(1 << 24)
 	lsl	r1, r3, r1
+ARM_BE8(rev	r1, r1)
 
 	/* Add CPU to SMP group - Atomic */
 	add	r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET
diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S
index a06e0ede8c08..458ed3fb2626 100644
--- a/arch/arm/mach-mvebu/headsmp.S
+++ b/arch/arm/mach-mvebu/headsmp.S
@@ -21,6 +21,8 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 
+#include <asm/assembler.h>
+
 /*
  * At this stage the secondary CPUs don't have acces yet to the MMU, so
  * we have to provide physical addresses
@@ -35,6 +37,7 @@
  * startup
  */
 ENTRY(armada_xp_secondary_startup)
+ ARM_BE8(setend	be )			@ go BE8 if entered LE
 
 	/* Read CPU id */
 	mrc     p15, 0, r1, c0, c0, 5
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 5907e10c37fd..39858ba03084 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,6 +1,9 @@
 config ARCH_VEXPRESS
 	bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
+	select ARCH_HAS_CPUFREQ
+	select ARCH_HAS_OPP
 	select ARCH_REQUIRE_GPIOLIB
+	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
 	select ARM_GIC
 	select ARM_TIMER_SP804
@@ -56,5 +59,23 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 
 config ARCH_VEXPRESS_CA9X4
 	bool "Versatile Express Cortex-A9x4 tile"
+	select ARM_ERRATA_643719
+
+config ARCH_VEXPRESS_DCSCB
+	bool "Dual Cluster System Control Block (DCSCB) support"
+	depends on MCPM
+	select ARM_CCI
+	help
+	  Support for the Dual Cluster System Configuration Block (DCSCB).
+	  This is needed to provide CPU and cluster power management
+	  on RTSM implementing big.LITTLE.
+
+config ARCH_VEXPRESS_TC2
+	bool "TC2 cluster management"
+	depends on MCPM
+	select VEXPRESS_SPC
+	select ARM_CCI
+	help
+	  Support for CPU and cluster power management on TC2.
 
 endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 42703e8b4d3b..14193dc7e6e8 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 
 obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
+CFLAGS_REMOVE_dcscb.o			= -pg
+obj-$(CONFIG_ARCH_VEXPRESS_TC2)		+= tc2_pm.o tc2_pm_setup.o
+CFLAGS_REMOVE_tc2_pm.o			= -pg
+ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y)
+obj-$(CONFIG_ARM_PSCI)			+= tc2_pm_psci.o
+CFLAGS_REMOVE_tc2_pm_psci.o		= -pg
+endif
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4a85f1..bde4374ab6d5 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
 
 void vexpress_dt_smp_map_io(void);
 
+bool vexpress_smp_init_ops(void);
+
 extern struct smp_operations	vexpress_smp_ops;
 
 extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 000000000000..b35700f8e01f
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,236 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
+ *
+ * Created by:	Nicolas Pitre, May 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+
+#define RST_HOLD0	0x0
+#define RST_HOLD1	0x4
+#define SYS_SWRESET	0x8
+#define RST_STAT0	0xc
+#define RST_STAT1	0x10
+#define EAG_CFG_R	0x20
+#define EAG_CFG_W	0x24
+#define KFC_CFG_R	0x28
+#define KFC_CFG_W	0x2c
+#define DCS_CFG_R	0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() while its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
+static int dcscb_allcpus_mask[2];
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int rst_hold, cpumask = (1 << cpu);
+	unsigned int all_mask = dcscb_allcpus_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= 4 || cluster >= 2)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&dcscb_lock);
+
+	dcscb_use_count[cpu][cluster]++;
+	if (dcscb_use_count[cpu][cluster] == 1) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		if (rst_hold & (1 << 8)) {
+			/* remove cluster reset and add individual CPU's reset */
+			rst_hold &= ~(1 << 8);
+			rst_hold |= all_mask;
+		}
+		rst_hold &= ~(cpumask | (cpumask << 4));
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&dcscb_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void dcscb_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpumask = (1 << cpu);
+	all_mask = dcscb_allcpus_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&dcscb_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	dcscb_use_count[cpu][cluster]--;
+	if (dcscb_use_count[cpu][cluster] == 0) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		rst_hold |= cpumask;
+		if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) {
+			rst_hold |= (1 << 8);
+			last_man = true;
+		}
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&dcscb_lock);
+
+		/* Flush all cache levels for this cluster. */
+		v7_exit_coherency_flush(all);
+
+		/*
+		 * This is a harmless no-op.  On platforms with a real
+		 * outer cache this might either be needed or not,
+		 * depending on where the outer cache sits.
+		 */
+		outer_flush_all();
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&dcscb_lock);
+
+		/* Disable and flush the local CPU cache. */
+		v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	dsb();
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static const struct mcpm_platform_ops dcscb_power_ops = {
+	.power_up	= dcscb_power_up,
+	.power_down	= dcscb_power_down,
+};
+
+static void __init dcscb_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+	dcscb_use_count[cpu][cluster] = 1;
+}
+
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
+static int __init dcscb_init(void)
+{
+	struct device_node *node;
+	unsigned int cfg;
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	if (!cci_probed())
+		return -ENODEV;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
+	if (!node)
+		return -ENODEV;
+	dcscb_base = of_iomap(node, 0);
+	if (!dcscb_base)
+		return -EADDRNOTAVAIL;
+	cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+	dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+	dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
+	dcscb_usage_count_init();
+
+	ret = mcpm_platform_register(&dcscb_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(dcscb_power_up_setup);
+	if (ret) {
+		iounmap(dcscb_base);
+		return ret;
+	}
+
+	pr_info("VExpress DCSCB support installed\n");
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+
+	return 0;
+}
+
+early_initcall(dcscb_init);
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 000000000000..4bb7fbe0f621
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,38 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by:  Dave Martin, 2012-06-22
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+
+ENTRY(dcscb_power_up_setup)
+
+	cmp	r0, #0			@ check affinity level
+	beq	2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ *
+ * A15/A7 may not require explicit L2 invalidation on reset, dependent
+ * on hardware integration decisions.
+ * For now, this code assumes that L2 is either already invalidated,
+ * or invalidation is not required.
+ */
+
+	b	cci_enable_port_for_self
+
+2:	@ Implementation-specific local CPU setup operations should go here,
+	@ if any.  In this case, there is nothing to do.
+
+	bx	lr
+
+ENDPROC(dcscb_power_up_setup)
diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h
new file mode 100644
index 000000000000..d3b5a2225a0e
--- /dev/null
+++ b/arch/arm/mach-vexpress/include/mach/tc2.h
@@ -0,0 +1,10 @@
+#ifndef __MACH_TC2_H
+#define __MACH_TC2_H
+
+/*
+ * cpu and cluster limits
+ */
+#define TC2_MAX_CPUS		3
+#define TC2_MAX_CLUSTERS	2
+
+#endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index dc1ace55d557..b4a5f0d8390d 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,9 +12,11 @@
 #include <linux/errno.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/vexpress.h>
 
+#include <asm/mcpm.h>
 #include <asm/smp_scu.h>
 #include <asm/mach/map.h>
 
@@ -51,7 +53,7 @@ static int __init vexpress_dt_find_scu(unsigned long node,
 {
 	if (of_flat_dt_match(node, vexpress_dt_cortex_a9_match)) {
 		phys_addr_t phys_addr;
-		__be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
+		const __be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
 
 		if (WARN_ON(!reg))
 			return -EINVAL;
@@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = {
 	.cpu_die		= vexpress_cpu_die,
 #endif
 };
+
+bool __init vexpress_smp_init_ops(void)
+{
+#ifdef CONFIG_MCPM
+	/*
+	 * The best way to detect a multi-cluster configuration at the moment
+	 * is to look for the presence of a CCI in the system.
+	 * Override the default vexpress_smp_ops if so.
+	 */
+	struct device_node *node;
+	node = of_find_compatible_node(NULL, NULL, "arm,cci-400");
+	if (node && of_device_is_available(node)) {
+		mcpm_smp_set_ops();
+		return true;
+	}
+#endif
+	return false;
+}
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
new file mode 100644
index 000000000000..9fc264a3bade
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -0,0 +1,277 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support
+ *
+ * Created by:	Nicolas Pitre, October 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Some portions of this file were originally written by Achin Gupta
+ * Copyright:   (C) 2012  ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster)
+{
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cluster >= TC2_MAX_CLUSTERS ||
+	    cpu >= vexpress_spc_get_nb_cpus(cluster))
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster])
+		vexpress_spc_powerdown_enable(cluster, 0);
+
+	tc2_pm_use_count[cpu][cluster]++;
+	if (tc2_pm_use_count[cpu][cluster] == 1) {
+		vexpress_spc_write_resume_reg(cluster, cpu,
+					      virt_to_phys(mcpm_entry_point));
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+	} else if (tc2_pm_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void tc2_pm_down(u64 residency)
+{
+	unsigned int mpidr, cpu, cluster;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&tc2_pm_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	tc2_pm_use_count[cpu][cluster]--;
+	if (tc2_pm_use_count[cpu][cluster] == 0) {
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+		if (!tc2_pm_use_count[0][cluster] &&
+		    !tc2_pm_use_count[1][cluster] &&
+		    !tc2_pm_use_count[2][cluster] &&
+		    (!residency || residency > 5000)) {
+			vexpress_spc_powerdown_enable(cluster, 1);
+			vexpress_spc_set_global_wakeup_intr(1);
+			last_man = true;
+		}
+	} else if (tc2_pm_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	/*
+	 * If the CPU is committed to power down, make sure
+	 * the power controller will be in charge of waking it
+	 * up upon IRQ, ie IRQ lines are cut from GIC CPU IF
+	 * to the CPU by disabling the GIC CPU IF to prevent wfi
+	 * from completing execution behind power controller back
+	 */
+	if (!skip_wfi)
+		gic_cpu_if_down();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&tc2_pm_lock);
+
+		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+			/*
+			 * On the Cortex-A15 we need to disable
+			 * L2 prefetching before flushing the cache.
+			 */
+			asm volatile(
+			"mcr	p15, 1, %0, c15, c0, 3 \n\t"
+			"isb	\n\t"
+			"dsb	"
+			: : "r" (0x400) );
+		}
+
+		v7_exit_coherency_flush(all);
+
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		/*
+		 * If last man then undo any setup done previously.
+		 */
+		if (last_man) {
+			vexpress_spc_powerdown_enable(cluster, 0);
+			vexpress_spc_set_global_wakeup_intr(0);
+		}
+
+		arch_spin_unlock(&tc2_pm_lock);
+
+		v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static void tc2_pm_power_down(void)
+{
+	tc2_pm_down(0);
+}
+
+static void tc2_pm_suspend(u64 residency)
+{
+	extern void tc2_resume(void);
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	vexpress_spc_write_resume_reg(cluster, cpu,
+				      virt_to_phys(tc2_resume));
+
+	tc2_pm_down(residency);
+}
+
+static void tc2_pm_powered_up(void)
+{
+	unsigned int mpidr, cpu, cluster;
+	unsigned long flags;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	local_irq_save(flags);
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster]) {
+		vexpress_spc_powerdown_enable(cluster, 0);
+		vexpress_spc_set_global_wakeup_intr(0);
+	}
+
+	if (!tc2_pm_use_count[cpu][cluster])
+		tc2_pm_use_count[cpu][cluster] = 1;
+
+	vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0);
+	vexpress_spc_write_resume_reg(cluster, cpu, 0);
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_restore(flags);
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up	= tc2_pm_power_up,
+	.power_down	= tc2_pm_power_down,
+	.suspend	= tc2_pm_suspend,
+	.powered_up	= tc2_pm_powered_up,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	tc2_pm_use_count[cpu][cluster] = 1;
+}
+
+extern void tc2_pm_power_up_setup(unsigned int affinity_level);
+
+static int __init tc2_pm_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	if (!vexpress_spc_check_loaded())
+		return -ENODEV;
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(tc2_pm_power_up_setup);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c
new file mode 100644
index 000000000000..c2fdc22e4c06
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_psci.c
@@ -0,0 +1,173 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support
+ *
+ * Created by: Achin Gupta, December 2012
+ * Copyright:  (C) 2012  ARM Limited
+ *
+ * Some portions of this file were originally written by Nicolas Pitre
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/psci.h>
+#include <asm/atomic.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+
+/*
+ * Platform specific state id understood by the firmware and used to
+ * program the power controller
+ */
+#define PSCI_POWER_STATE_ID           0
+
+static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int mpidr = (cluster << 8) | cpu;
+	int ret = 0;
+
+	BUG_ON(!psci_ops.cpu_on);
+
+	switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * This is a request to power up a cpu that linux thinks has
+		 * been powered down. Retries are needed if the firmware has
+		 * seen the power down request as yet.
+		 */
+		do
+			ret = psci_ops.cpu_on(mpidr,
+					      virt_to_phys(mcpm_entry_point));
+		while (ret == -EAGAIN);
+
+		return ret;
+	case 2:
+		/* This power up request has overtaken a power down request */
+		return ret;
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_power_down(void)
+{
+	struct psci_power_state power_state;
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	BUG_ON(!psci_ops.cpu_off);
+
+	switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * Overtaken by a power up. Flush caches, exit coherency,
+		 * return & fake a reset
+		 */
+		set_cr(get_cr() & ~CR_C);
+
+		flush_cache_louis();
+
+		asm volatile ("clrex");
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		return;
+	case 0:
+		/* A normal request to possibly power down the cluster */
+		power_state.id = PSCI_POWER_STATE_ID;
+		power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+		power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+		psci_ops.cpu_off(power_state);
+
+		/* On success this function never returns */
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_suspend(u64 unused)
+{
+	struct psci_power_state power_state;
+
+	BUG_ON(!psci_ops.cpu_suspend);
+
+	/* On TC2 always attempt to power down the cluster */
+	power_state.id = PSCI_POWER_STATE_ID;
+	power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+	power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+	psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point));
+
+	/* On success this function never returns */
+	BUG();
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up      = tc2_pm_psci_power_up,
+	.power_down    = tc2_pm_psci_power_down,
+	.suspend       = tc2_pm_psci_suspend,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	atomic_set(&tc2_pm_use_count[cpu][cluster], 1);
+}
+
+static int __init tc2_pm_psci_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (ret) {
+		pr_debug("psci not found. Aborting psci init\n");
+		return -ENODEV;
+	}
+
+	if (!vexpress_spc_check_loaded()) {
+		pr_debug("spc not found. Aborting psci init\n");
+		return -ENODEV;
+	}
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(NULL);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_psci_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S
new file mode 100644
index 000000000000..a18dafeeb0ee
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_setup.S
@@ -0,0 +1,68 @@
+/*
+ * arch/arm/include/asm/tc2_pm_setup.S
+ *
+ * Created by: Nicolas Pitre, October 2012
+ (             (based on dcscb_setup.S by Dave Martin)
+ * Copyright:  (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+
+#define SPC_PHYS_BASE		0x7FFF0000
+#define SPC_WAKE_INT_STAT	0xb2c
+
+#define SNOOP_CTL_A15		0x404
+#define SNOOP_CTL_A7		0x504
+
+#define A15_SNOOP_MASK		(0x3 << 7)
+#define A7_SNOOP_MASK		(0x1 << 13)
+
+#define A15_BX_ADDR0		0xB68
+
+
+ENTRY(tc2_resume)
+	mrc	p15, 0, r0, c0, c0, 5
+	ubfx	r1, r0, #0, #4		@ r1 = cpu
+	ubfx	r2, r0, #8, #4		@ r2 = cluster
+	add	r1, r1, r2, lsl #2	@ r1 = index of CPU in WAKE_INT_STAT
+	ldr	r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT
+	ldr	r3, [r3]
+	lsr	r3, r1
+	tst	r3, #1
+	wfieq				@ if no pending IRQ reenters wfi
+	b	mcpm_entry_point
+ENDPROC(tc2_resume)
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+ENTRY(tc2_pm_power_up_setup)
+
+	cmp	r0, #0
+	beq	2f
+
+	b cci_enable_port_for_self
+
+2:	@ Clear the BX addr register
+	ldr	r3, =SPC_PHYS_BASE + A15_BX_ADDR0
+	mrc	p15, 0, r0, c0, c0, 5	@ MPIDR
+	ubfx	r1, r0, #8, #4		@ cluster
+	ubfx	r0, r0, #0, #4		@ cpu
+	add	r3, r3, r1, lsl #4
+	mov	r1, #0
+	str	r1, [r3, r0, lsl #2]
+	dsb
+
+	bx	lr
+
+ENDPROC(tc2_pm_power_up_setup)
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 8802030df98d..057f99b62eaf 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -10,6 +10,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/irqchip.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/of_irq.h>
@@ -373,6 +374,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.init_machine	= v2m_init,
 MACHINE_END
 
+static void __init v2m_dt_hdlcd_init(void)
+{
+	struct device_node *node;
+	int len, na, ns;
+	const __be32 *prop;
+	phys_addr_t fb_base, fb_size;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,hdlcd");
+	if (!node)
+		return;
+
+	na = of_n_addr_cells(node);
+	ns = of_n_size_cells(node);
+
+	prop = of_get_property(node, "framebuffer", &len);
+	if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+		return;
+
+	fb_base = of_read_number(prop, na);
+	fb_size = of_read_number(prop + na, ns);
+
+	if (WARN_ON(memblock_remove(fb_base, fb_size)))
+		return;
+};
+
 static struct map_desc v2m_rs1_io_desc __initdata = {
 	.virtual	= V2M_PERIPH,
 	.pfn		= __phys_to_pfn(0x1c000000),
@@ -423,6 +449,8 @@ void __init v2m_dt_init_early(void)
 			pr_warning("vexpress: DT HBI (%x) is not matching "
 					"hardware (%x)!\n", dt_hbi, hbi);
 	}
+
+	v2m_dt_hdlcd_init();
 }
 
 static void __init v2m_dt_timer_init(void)
@@ -456,6 +484,7 @@ static const char * const v2m_dt_match[] __initconst = {
 DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
 	.smp		= smp_ops(vexpress_smp_ops),
+	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
 	.init_irq	= irqchip_init,
diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile
index 042afc1f8c44..7ddbfa60227f 100644
--- a/arch/arm/mach-virt/Makefile
+++ b/arch/arm/mach-virt/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-y					:= virt.o
-obj-$(CONFIG_SMP)			+= platsmp.o
diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c
deleted file mode 100644
index f4143f5bfa5b..000000000000
--- a/arch/arm/mach-virt/platsmp.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Dummy Virtual Machine - does what it says on the tin.
- *
- * Copyright (C) 2012 ARM Ltd
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/of.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-extern void secondary_startup(void);
-
-static void __init virt_smp_init_cpus(void)
-{
-}
-
-static void __init virt_smp_prepare_cpus(unsigned int max_cpus)
-{
-}
-
-static int __cpuinit virt_boot_secondary(unsigned int cpu,
-					 struct task_struct *idle)
-{
-	if (psci_ops.cpu_on)
-		return psci_ops.cpu_on(cpu_logical_map(cpu),
-				       __pa(secondary_startup));
-	return -ENODEV;
-}
-
-struct smp_operations __initdata virt_smp_ops = {
-	.smp_init_cpus		= virt_smp_init_cpus,
-	.smp_prepare_cpus	= virt_smp_prepare_cpus,
-	.smp_boot_secondary	= virt_boot_secondary,
-};
diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c
index 061f283f579e..a67d2dd5bb60 100644
--- a/arch/arm/mach-virt/virt.c
+++ b/arch/arm/mach-virt/virt.c
@@ -36,11 +36,8 @@ static const char *virt_dt_match[] = {
 	NULL
 };
 
-extern struct smp_operations virt_smp_ops;
-
 DT_MACHINE_START(VIRT, "Dummy Virtual Machine")
 	.init_irq	= irqchip_init,
 	.init_machine	= virt_init,
-	.smp		= smp_ops(virt_smp_ops),
 	.dt_compat	= virt_dt_match,
 MACHINE_END
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c21082d664ed..36e9f24e03b0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -931,3 +931,9 @@ config ARCH_HAS_BARRIERS
 	help
 	  This option allows the use of custom mandatory barriers
 	  included via the mach/barriers.h file.
+
+config ARCH_SUPPORTS_BIG_ENDIAN
+	bool
+	help
+	  This option specifies the architecture can support big endian
+	  operation.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 9e51be96f635..224a9cc09877 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 5d777a567c35..8c48c5c22a33 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -32,9 +32,8 @@ ENTRY(v6_early_abort)
 	bne	do_DataAbort
 	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
 	ldr	r3, [r4]			@ read aborted ARM instruction
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	rev	r3, r3
-#endif
+ ARM_BE8(rev	r3, r3)
+
 	do_ldrd_abort tmp=ip, insn=r3
 	tst	r3, #1 << 20			@ L = 0 -> write
 	orreq	r1, r1, #1 << 11		@ yes.
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 1fe0bf5c7375..d301662b7b32 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -25,6 +25,7 @@
 #include <asm/cp15.h>
 #include <asm/system_info.h>
 #include <asm/unaligned.h>
+#include <asm/opcodes.h>
 
 #include "fault.h"
 
@@ -763,21 +764,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (thumb_mode(regs)) {
 		u16 *ptr = (u16 *)(instrptr & ~1);
 		fault = probe_kernel_address(ptr, tinstr);
+		tinstr = __mem_to_opcode_thumb16(tinstr);
 		if (!fault) {
 			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
 			    IS_T32(tinstr)) {
 				/* Thumb-2 32-bit */
 				u16 tinst2 = 0;
 				fault = probe_kernel_address(ptr + 1, tinst2);
-				instr = (tinstr << 16) | tinst2;
+				tinst2 = __mem_to_opcode_thumb16(tinst2);
+				instr = __opcode_thumb32_compose(tinstr, tinst2);
 				thumb2_32b = 1;
 			} else {
 				isize = 2;
 				instr = thumb2arm(tinstr);
 			}
 		}
-	} else
+	} else {
 		fault = probe_kernel_address(instrptr, instr);
+		instr = __mem_to_opcode_arm(instr);
+	}
 
 	if (fault) {
 		type = TYPE_FAULT;
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 515b00064da8..a84e0536ce74 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -146,18 +146,18 @@ flush_levels:
 	ldr	r7, =0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
 loop1:
-	mov	r9, r4				@ create working copy of max way size
+	mov	r9, r7				@ create working copy of max index
 loop2:
- ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r9, r5		)
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
  THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r7, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r7, r2		)
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
  THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
 	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-	subs	r9, r9, #1			@ decrement the way
+	subs	r9, r9, #1			@ decrement the index
 	bge	loop2
-	subs	r7, r7, #1			@ decrement the index
+	subs	r4, r4, #1			@ decrement the way
 	bge	loop1
 skip:
 	add	r10, r10, #2			@ increment cache number
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 6c9d7054d997..096179c4e0c9 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 #ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#warning ARM Coherent DMA allocator does not (yet) support huge TLB
 #endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f954f6..e207aa5f846f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
 
 	if (pud_none(*pud_k))
 		goto bad_area;
-	if (!pud_present(*pud))
+	if (!pud_present(*pud)) {
 		set_pud(pud, *pud_k);
+		/*
+		 * There is a small window during free_pgtables() where the
+		 * user *pud entry is 0 but the TLB has not been invalidated
+		 * and we get a level 2 (pmd) translation fault caused by the
+		 * intermediate TLB caching of the old level 1 (pud) entry.
+		 */
+		flush_tlb_kernel_page(addr);
+	}
 
 	pmd = pmd_offset(pud, addr);
 	pmd_k = pmd_offset(pud_k, addr);
@@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
 #endif
 	if (pmd_none(pmd_k[index]))
 		goto bad_area;
+	if (!pmd_present(pmd[index]))
+		copy_pmd(pmd, pmd_k);
 
-	copy_pmd(pmd, pmd_k);
 	return 0;
 
 bad_area:
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 32aa5861119f..c9e37aac450b 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -17,6 +17,7 @@
 #include <asm/highmem.h>
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
+#include <linux/hugetlb.h>
 
 #include "mm.h"
 
@@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * coherent with the kernels mapping.
 	 */
 	if (!PageHighMem(page)) {
-		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+		size_t page_size = PAGE_SIZE << compound_order(page);
+		__cpuc_flush_dcache_area(page_address(page), page_size);
 	} else {
-		void *addr;
-
+		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			addr = kmap_atomic(page);
-			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-			kunmap_atomic(addr);
-		} else {
-			addr = kmap_high_get(page);
-			if (addr) {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_atomic(page);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-				kunmap_high(page);
+				kunmap_atomic(addr);
+			}
+		} else {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_high_get(page);
+				if (addr) {
+					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+					kunmap_high(page);
+				}
 			}
 		}
 	}
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index 05a4e9431836..ab4409a2307e 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -9,11 +9,11 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
new file mode 100644
index 000000000000..3d1e4a205b0b
--- /dev/null
+++ b/arch/arm/mm/hugetlbpage.c
@@ -0,0 +1,101 @@
+/*
+ * arch/arm/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
+ * of type casting from pmd_t * to pte_t *.
+ */
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud))
+			pmd = pmd_offset(pud, addr);
+	}
+
+	return (pte_t *)pmd;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
+	return pte;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c61d2373408c..990837379112 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -10,6 +10,7 @@
 #include <asm/system_info.h>
 
 pgd_t *idmap_pgd;
+phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 
 #ifdef CONFIG_ARM_LPAE
 static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
@@ -74,8 +75,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
 	unsigned long addr, end;
 	unsigned long next;
 
-	addr = virt_to_phys(text_start);
-	end = virt_to_phys(text_end);
+	addr = virt_to_idmap(text_start);
+	end = virt_to_idmap(text_end);
 
 	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0ecc43fd6229..c12ae661d4ab 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag)
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	phys_initrd_start = start;
 	phys_initrd_size = end - start;
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index d07352819580..b96c6e64943e 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -219,9 +219,7 @@ __v6_setup:
 						@ complete invalidations
 	adr	r5, v6_crval
 	ldmia	r5, {r5, r6}
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	orr	r6, r6, #1 << 25		@ big-endian page tables
-#endif
+ ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
 	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 19da84172cc3..769496e6e8e9 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -352,9 +352,7 @@ __v7_setup:
 #endif
 	adr	r5, v7_crval
 	ldmia	r5, {r5, r6}
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	orr	r6, r6, #1 << 25		@ big-endian page tables
-#endif
+ ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
 #ifdef CONFIG_SWP_EMULATE
 	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
 	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6de423dbd385..78351ca8d51e 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -19,6 +19,7 @@
 #include <linux/if_vlan.h>
 #include <asm/cacheflush.h>
 #include <asm/hwcap.h>
+#include <asm/opcodes.h>
 
 #include "bpf_jit_32.h"
 
@@ -113,8 +114,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor)
 
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
+	inst |= (cond << 28);
+	inst = __opcode_to_mem_arm(inst);
+
 	if (ctx->target != NULL)
-		ctx->target[ctx->idx] = inst | (cond << 28);
+		ctx->target[ctx->idx] = inst;
 
 	ctx->idx++;
 }
diff --git a/arch/arm/plat-samsung/s5p-dev-mfc.c b/arch/arm/plat-samsung/s5p-dev-mfc.c
index a93fb6fb6606..586ca73d1059 100644
--- a/arch/arm/plat-samsung/s5p-dev-mfc.c
+++ b/arch/arm/plat-samsung/s5p-dev-mfc.c
@@ -116,8 +116,8 @@ device_initcall(s5p_mfc_memory_init);
 int __init s5p_fdt_find_mfc_mem(unsigned long node, const char *uname,
 				int depth, void *data)
 {
-	__be32 *prop;
-	unsigned long len;
+	const __be32 *prop;
+	int len;
 	struct s5p_mfc_dt_meminfo *mfc_mem = data;
 
 	if (!data)
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S
index b178d44e9eaa..40f27e52de75 100644
--- a/arch/arm/plat-versatile/headsmp.S
+++ b/arch/arm/plat-versatile/headsmp.S
@@ -10,8 +10,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
-
-	__INIT
+#include <asm/assembler.h>
 
 /*
  * Realview/Versatile Express specific entry point for secondary CPUs.
@@ -19,6 +18,7 @@
  * until we're ready for them to initialise.
  */
 ENTRY(versatile_secondary_startup)
+ ARM_BE8(setend	be)
 	mrc	p15, 0, r0, c0, c0, 5
 	bic	r0, #0xff000000
 	adr	r4, 1f
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0677ff4814fa..279594b83781 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,9 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_HAS_OPP
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
@@ -8,30 +11,53 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select ARM_GIC_V3
+	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
+	select CPU_PM if (SUSPEND || CPU_IDLE)
+	select DCACHE_WORD_ACCESS
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
+	select HAVE_DMA_CONTIGUOUS
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_MEMBLOCK
+	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
 	select PERF_USE_VMALLOC
 	select POWER_RESET
 	select POWER_SUPPLY
@@ -62,11 +88,7 @@ config LOCKDEP_SUPPORT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config GENERIC_LOCKBREAK
-	def_bool y
-	depends on SMP && PREEMPT
-
-config RWSEM_GENERIC_SPINLOCK
+config RWSEM_XCHGADD_ALGORITHM
 	def_bool y
 
 config GENERIC_HWEIGHT
@@ -78,7 +100,7 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
-config ZONE_DMA32
+config ZONE_DMA
 	def_bool y
 
 config ARCH_DMA_ADDR_T_64BIT
@@ -96,6 +118,9 @@ config SWIOTLB
 config IOMMU_HELPER
 	def_bool SWIOTLB
 
+config FIX_EARLYCON_MEM
+	def_bool y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -112,6 +137,11 @@ config ARCH_VEXPRESS
 	  This enables support for the ARMv8 software model (Versatile
 	  Express).
 
+config ARCH_XGENE
+	bool "AppliedMicro X-Gene SOC Family"
+	help
+	  This enables support for AppliedMicro X-Gene SOC Family
+
 endmenu
 
 menu "Bus support"
@@ -131,6 +161,11 @@ config ARM64_64K_PAGES
 	  look-up. AArch32 emulation is not available when this feature
 	  is enabled.
 
+config CPU_BIG_ENDIAN
+       bool "Build big-endian kernel"
+       help
+         Say Y if you plan on running a kernel in big-endian mode.
+
 config SMP
 	bool "Symmetric Multi-Processing"
 	select USE_GENERIC_SMP_HELPERS
@@ -145,11 +180,131 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  MultiThreading at a cost of slightly increased overhead in some
+	  places. If unsure say N here.
+
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+	bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+	help
+	  Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+	bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+	depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+	help
+	  Experimental scheduler optimizations for heterogeneous platforms.
+	  Attempts to introspectively select task affinity to optimize power
+	  and performance. Basic support for multiple (>2) cpu types is in place,
+	  but it has only been tested with two types of cpus.
+	  There is currently no support for migration of task groups, hence
+	  !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+	  between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
+config SCHED_HMP_PRIO_FILTER
+	bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+	depends on SCHED_HMP
+	help
+	  Enables task priority based HMP migration filter. Any task with
+	  a NICE value above the threshold will always be on low-power cpus
+	  with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+	int "NICE priority threshold"
+	default 5
+	depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+	string "HMP scheduler fast CPU mask"
+	depends on SCHED_HMP
+	help
+          Leave empty to use device tree information.
+	  Specify the cpuids of the fast CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+	string "HMP scheduler slow CPU mask"
+	depends on SCHED_HMP
+	help
+	  Leave empty to use device tree information.
+	  Specify the cpuids of the slow CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+	bool "Allows changing the load tracking scale through sysfs"
+	depends on SCHED_HMP
+	help
+	  When turned on, this option exports the thresholds and load average
+	  period value for the load tracking patches through sysfs.
+	  The values can be modified to change the rate of load accumulation
+	  and the thresholds used for HMP migration.
+	  The load_avg_period_ms is the time in ms to reach a load average of
+	  0.5 for an idle task of 0 load average ratio that start a busy loop.
+	  The up_threshold and down_threshold is the value to go to a faster
+	  CPU or to go back to a slower cpu.
+	  The {up,down}_threshold are devided by 1024 before being compared
+	  to the load average.
+	  For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+	  a running task with a load of 0 will be migrated to a bigger CPU after
+	  128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+	  up_threshold is 0.5.
+	  This patch has the same behavior as changing the Y of the load
+	  average computation to
+	        (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+	  but it remove intermadiate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+	bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+	depends on HMP_VARIABLE_SCALE && CPU_FREQ
+	help
+	  Scales the current load contribution in line with the frequency
+	  of the CPU that the task was executed on.
+	  In this version, we use a simple linear scale derived from the
+	  maximum frequency reported by CPUFreq.
+	  Restricting tracked load to be scaled by the CPU's frequency
+	  represents the consumption of possible compute capacity
+	  (rather than consumption of actual instantaneous capacity as
+	  normal) and allows the HMP migration's simple threshold
+	  migration strategy to interact more predictably with CPUFreq's
+	  asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+	bool "Small task packing for HMP"
+	depends on SCHED_HMP
+	default n
+	help
+	  Allows the HMP Scheduler to pack small tasks into CPUs in the
+	  smallest HMP domain.
+	  Controlled by two sysfs files in sys/kernel/hmp.
+	  packing_enable: 1 to enable, 0 to disable packing. Default 1.
+	  packing_limit: runqueue load ratio where a RQ is considered
+	    to be full. Default is NICE_0_LOAD * 9/8.
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
 	depends on SMP
-	default "4"
+	# These have to remain sorted largest to smallest
+	default "8"
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
 
 source kernel/Kconfig.preempt
 
@@ -181,8 +336,25 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y if !ARM64_64K_PAGES
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	def_bool y
+
 source "mm/Kconfig"
 
+config FORCE_MAX_ZONEORDER
+	int
+	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+	default "11"
+
 endmenu
 
 menu "Boot options"
@@ -203,6 +375,20 @@ config CMDLINE_FORCE
 	  This is useful if you cannot or don't want to change the
 	  command-line options your boot loader passes to the kernel.
 
+config EFI
+	bool "UEFI runtime support"
+	depends on OF && !CPU_BIG_ENDIAN
+	select LIBFDT
+	select UCS2_STRING
+	select EFI_PARAMS_FROM_FDT
+	default y
+	help
+	  This option provides support for runtime services provided
+	  by UEFI firmware (such as non-volatile variables, realtime
+          clock, and platform reset). A UEFI stub is also provided to
+	  allow the kernel to be booted as an EFI application. This
+	  is only useful on systems that have UEFI firmware.
+
 endmenu
 
 menu "Userspace binary formats"
@@ -230,16 +416,42 @@ config SYSVIPC_COMPAT
 
 endmenu
 
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+source "drivers/cpufreq/Kconfig"
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+
+config ARM64_CPU_SUSPEND
+	def_bool PM_SLEEP
+
+endmenu
+
+menu "CPU Power Management"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
 
+source "drivers/firmware/Kconfig"
+
 source "fs/Kconfig"
 
+source "arch/arm64/kvm/Kconfig"
+
 source "arch/arm64/Kconfig.debug"
 
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
 
 source "lib/Kconfig"
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 1a6bfe954d49..e1b0c4601b3e 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -13,6 +13,20 @@ config DEBUG_STACK_USAGE
 	  Enables the display of the minimum amount of free stack which each
 	  task has ever had available in the sysrq-T output.
 
+config STRICT_DEVMEM
+	bool "Filter access to /dev/mem"
+	depends on MMU
+	help
+	  If this option is disabled, you allow userspace (root) access to all
+	  of memory, including kernel and userspace memory. Accidental
+	  access to this is obviously disastrous, but specific access can
+	  be used by people debugging the kernel.
+
+	  If this option is switched on, the /dev/mem file only allows
+	  userspace access to memory mapped peripherals.
+
+	  If in doubt, say Y.
+
 config EARLY_PRINTK
 	bool "Early printk support"
 	default y
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index c95c5cb212fd..8a311839e2d7 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -20,9 +20,15 @@ LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 KBUILD_DEFCONFIG := defconfig
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+KBUILD_CPPFLAGS	+= -mbig-endian
+AS		+= -EB
+LD		+= -EB
+else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 AS		+= -EL
 LD		+= -EL
+endif
 
 comma = ,
 
@@ -37,6 +43,8 @@ TEXT_OFFSET := 0x00080000
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
@@ -60,6 +68,10 @@ zinstall install: vmlinux
 dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts dtbs
 
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 68457e9e0975..ef388176116d 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,4 +1,7 @@
-dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \
+				fvp-base-gicv2-psci.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb
+dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
 
 targets += dtbs
 targets += $(dtb-y)
diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts
new file mode 100644
index 000000000000..6541962f5d70
--- /dev/null
+++ b/arch/arm64/boot/dts/apm-mustang.dts
@@ -0,0 +1,30 @@
+/*
+ * dts file for AppliedMicro (APM) Mustang Board
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/dts-v1/;
+
+/include/ "apm-storm.dtsi"
+
+/ {
+	model = "APM X-Gene Mustang board";
+	compatible = "apm,mustang", "apm,xgene-storm";
+
+	chosen { };
+
+	memory {
+		device_type = "memory";
+		reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
+	};
+};
+
+&serial0 {
+	status = "ok";
+};
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
new file mode 100644
index 000000000000..42edf193d084
--- /dev/null
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -0,0 +1,398 @@
+/*
+ * dts file for AppliedMicro (APM) X-Gene Storm SOC
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/ {
+	compatible = "apm,xgene-storm";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@000 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x000>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@001 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x001>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@200 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x200>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@201 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x201>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@300 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x300>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@301 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x301>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+	};
+
+	gic: interrupt-controller@78010000 {
+		compatible = "arm,cortex-a15-gic";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		reg = <0x0 0x78010000 0x0 0x1000>,	/* GIC Dist */
+		      <0x0 0x78020000 0x0 0x1000>,	/* GIC CPU */
+		      <0x0 0x78040000 0x0 0x2000>,	/* GIC VCPU Control */
+		      <0x0 0x78060000 0x0 0x2000>;	/* GIC VCPU */
+		interrupts = <1 9 0xf04>;	/* GIC Maintenence IRQ */
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 0 0xff01>,	/* Secure Phys IRQ */
+			     <1 13 0xff01>,	/* Non-secure Phys IRQ */
+			     <1 14 0xff01>,	/* Virt IRQ */
+			     <1 15 0xff01>;	/* Hyp IRQ */
+		clock-frequency = <50000000>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		clocks {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			refclk: refclk {
+				compatible = "fixed-clock";
+				#clock-cells = <1>;
+				clock-frequency = <100000000>;
+				clock-output-names = "refclk";
+			};
+
+			pcppll: pcppll@17000100 {
+				compatible = "apm,xgene-pcppll-clock";
+				#clock-cells = <1>;
+				clocks = <&refclk 0>;
+				clock-names = "pcppll";
+				reg = <0x0 0x17000100 0x0 0x1000>;
+				clock-output-names = "pcppll";
+				type = <0>;
+			};
+
+			socpll: socpll@17000120 {
+				compatible = "apm,xgene-socpll-clock";
+				#clock-cells = <1>;
+				clocks = <&refclk 0>;
+				clock-names = "socpll";
+				reg = <0x0 0x17000120 0x0 0x1000>;
+				clock-output-names = "socpll";
+				type = <1>;
+			};
+
+			socplldiv2: socplldiv2  {
+				compatible = "fixed-factor-clock";
+				#clock-cells = <1>;
+				clocks = <&socpll 0>;
+				clock-names = "socplldiv2";
+				clock-mult = <1>;
+				clock-div = <2>;
+				clock-output-names = "socplldiv2";
+			};
+
+			qmlclk: qmlclk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				clock-names = "qmlclk";
+				reg = <0x0 0x1703C000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "qmlclk";
+			};
+
+			ethclk: ethclk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				clock-names = "ethclk";
+				reg = <0x0 0x17000000 0x0 0x1000>;
+				reg-names = "div-reg";
+				divider-offset = <0x238>;
+				divider-width = <0x9>;
+				divider-shift = <0x0>;
+				clock-output-names = "ethclk";
+			};
+
+			eth8clk: eth8clk {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&ethclk 0>;
+				clock-names = "eth8clk";
+				reg = <0x0 0x1702C000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "eth8clk";
+			};
+
+			sataphy1clk: sataphy1clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy1clk";
+				status = "disabled";
+				csr-offset = <0x4>;
+				csr-mask = <0x00>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sataphy2clk: sataphy1clk@1f22c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f22c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy2clk";
+				status = "ok";
+				csr-offset = <0x4>;
+				csr-mask = <0x3a>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sataphy3clk: sataphy1clk@1f23c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f23c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sataphy3clk";
+				status = "ok";
+				csr-offset = <0x4>;
+				csr-mask = <0x3a>;
+				enable-offset = <0x0>;
+				enable-mask = <0x06>;
+			};
+
+			sata01clk: sata01clk@1f21c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f21c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata01clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			sata23clk: sata23clk@1f22c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f22c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata23clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			sata45clk: sata45clk@1f23c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f23c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "sata45clk";
+				csr-offset = <0x4>;
+				csr-mask = <0x05>;
+				enable-offset = <0x0>;
+				enable-mask = <0x39>;
+			};
+
+			rtcclk: rtcclk@17000000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x17000000 0x0 0x2000>;
+				reg-names = "csr-reg";
+				csr-offset = <0xc>;
+				csr-mask = <0x2>;
+				enable-offset = <0x10>;
+				enable-mask = <0x2>;
+				clock-output-names = "rtcclk";
+			};
+		};
+
+		serial0: serial@1c020000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c020000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4c 0x4>;
+		};
+
+		serial1: serial@1c021000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c021000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4d 0x4>;
+		};
+
+		serial2: serial@1c022000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c022000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4e 0x4>;
+		};
+
+		serial3: serial@1c023000 {
+			status = "disabled";
+			device_type = "serial";
+			compatible = "ns16550a";
+			reg = <0 0x1c023000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4f 0x4>;
+		};
+
+		phy1: phy@1f21a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f21a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy1clk 0>;
+			status = "disabled";
+			apm,tx-boost-gain = <30 30 30 30 30 30>;
+			apm,tx-eye-tuning = <2 10 10 2 10 10>;
+		};
+
+		phy2: phy@1f22a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f22a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy2clk 0>;
+			status = "ok";
+			apm,tx-boost-gain = <30 30 30 30 30 30>;
+			apm,tx-eye-tuning = <1 10 10 2 10 10>;
+		};
+
+		phy3: phy@1f23a000 {
+			compatible = "apm,xgene-phy";
+			reg = <0x0 0x1f23a000 0x0 0x100>;
+			#phy-cells = <1>;
+			clocks = <&sataphy3clk 0>;
+			status = "ok";
+			apm,tx-boost-gain = <31 31 31 31 31 31>;
+			apm,tx-eye-tuning = <2 10 10 2 10 10>;
+		};
+
+		sata1: sata@1a000000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a000000 0x0 0x1000>,
+			      <0x0 0x1f210000 0x0 0x1000>,
+			      <0x0 0x1f21d000 0x0 0x1000>,
+			      <0x0 0x1f21e000 0x0 0x1000>,
+			      <0x0 0x1f217000 0x0 0x1000>;
+			interrupts = <0x0 0x86 0x4>;
+			status = "disabled";
+			clocks = <&sata01clk 0>;
+			phys = <&phy1 0>;
+			phy-names = "sata-phy";
+		};
+
+		sata2: sata@1a400000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a400000 0x0 0x1000>,
+			      <0x0 0x1f220000 0x0 0x1000>,
+			      <0x0 0x1f22d000 0x0 0x1000>,
+			      <0x0 0x1f22e000 0x0 0x1000>,
+			      <0x0 0x1f227000 0x0 0x1000>;
+			interrupts = <0x0 0x87 0x4>;
+			status = "ok";
+			clocks = <&sata23clk 0>;
+			phys = <&phy2 0>;
+			phy-names = "sata-phy";
+		};
+
+		sata3: sata@1a800000 {
+			compatible = "apm,xgene-ahci";
+			reg = <0x0 0x1a800000 0x0 0x1000>,
+			      <0x0 0x1f230000 0x0 0x1000>,
+			      <0x0 0x1f23d000 0x0 0x1000>,
+			      <0x0 0x1f23e000 0x0 0x1000>;
+			interrupts = <0x0 0x88 0x4>;
+			status = "ok";
+			clocks = <&sata45clk 0>;
+			phys = <&phy3 0>;
+			phy-names = "sata-phy";
+		};
+
+		rtc: rtc@10510000 {
+			compatible = "apm,xgene-rtc";
+			reg = <0x0 0x10510000 0x0 0x400>;
+			interrupts = <0x0 0x46 0x4>;
+			#clock-cells = <1>;
+			clocks = <&rtcclk 0>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/clcd-panels.dtsi b/arch/arm64/boot/dts/clcd-panels.dtsi
new file mode 100644
index 000000000000..0b0ff6ead4b2
--- /dev/null
+++ b/arch/arm64/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+	panels {
+		panel@0 {
+			compatible	= "panel";
+			mode		= "VGA";
+			refresh		= <60>;
+			xres		= <640>;
+			yres		= <480>;
+			pixclock	= <39721>;
+			left_margin	= <40>;
+			right_margin	= <24>;
+			upper_margin	= <32>;
+			lower_margin	= <11>;
+			hsync_len	= <96>;
+			vsync_len	= <2>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+
+		panel@1 {
+			compatible	= "panel";
+			mode		= "XVGA";
+			refresh		= <60>;
+			xres		= <1024>;
+			yres		= <768>;
+			pixclock	= <15748>;
+			left_margin	= <152>;
+			right_margin	= <48>;
+			upper_margin	= <23>;
+			lower_margin	= <3>;
+			hsync_len	= <104>;
+			vsync_len	= <4>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
new file mode 100644
index 000000000000..ed55571e06dd
--- /dev/null
+++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2013, ARM Limited. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+};
+
+/ {
+	model = "FVP Base";
+	compatible = "arm,vfp-base", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	psci {
+		compatible = "arm,psci";
+		method = "smc";
+		cpu_suspend = <0x84000001>;
+		cpu_off = <0x84000002>;
+		cpu_on = <0xc4000003>;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		idle-states {
+			entry-method = "arm,psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				entry-method-param = <0x0010000>;
+				entry-latency-us = <40>;
+				exit-latency-us = <100>;
+				min-residency-us = <150>;
+			};
+
+			CLUSTER_SLEEP_0: cluster-sleep-0 {
+				compatible = "arm,idle-state";
+				entry-method-param = <0x1010000>;
+				entry-latency-us = <500>;
+				exit-latency-us = <1000>;
+				min-residency-us = <2500>;
+			};
+		};
+
+		big0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		big1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		big2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		big3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57", "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		little0: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		little1: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		little2: cpu@102 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x102>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+		little3: cpu@103 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x103>;
+			enable-method = "psci";
+			clock-frequency = <1000000>;
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		};
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&big0>;
+				};
+				core1 {
+					cpu = <&big1>;
+				};
+				core2 {
+					cpu = <&big2>;
+				};
+				core3 {
+					cpu = <&big3>;
+				};
+			};
+			cluster1 {
+				core0 {
+					cpu = <&little0>;
+				};
+				core1 {
+					cpu = <&little1>;
+				};
+				core2 {
+					cpu = <&little2>;
+				};
+				core3 {
+					cpu = <&little3>;
+				};
+			};
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0 0x80000000>,
+		      <0x00000008 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2f000000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x0 0x2f000000 0 0x10000>,
+		      <0x0 0x2c000000 0 0x2000>,
+		      <0x0 0x2c010000 0 0x2000>,
+		      <0x0 0x2c02F000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xff01>,
+			     <1 14 0xff01>,
+			     <1 11 0xff01>,
+			     <1 10 0xff01>;
+		clock-frequency = <100000000>;
+	};
+
+	timer@2a810000 {
+			compatible = "arm,armv7-timer-mem";
+			reg = <0x0 0x2a810000 0x0 0x10000>;
+			clock-frequency = <100000000>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			frame@2a820000 {
+				frame-number = <0>;
+				interrupts = <0 25 4>;
+				reg = <0x0 0x2a820000 0x0 0x10000>;
+			};
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <0 60 4>,
+			     <0 61 4>,
+			     <0 62 4>,
+			     <0 63 4>;
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts
new file mode 100644
index 000000000000..f260d702041c
--- /dev/null
+++ b/arch/arm64/boot/dts/juno.dts
@@ -0,0 +1,498 @@
+/*
+ * ARM Ltd. Juno Plaform
+ *
+ * Fast Models FVP v2 support
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	model = "Juno";
+	compatible = "arm,juno", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &soc_uart0;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+		};
+
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+		};
+
+		cpu@102 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x102>;
+			enable-method = "psci";
+		};
+
+		cpu@103 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53","arm,armv8";
+			reg = <0x0 0x103>;
+			enable-method = "psci";
+		};
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57","arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57","arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0x0 0x7f000000>,
+		      <0x00000008 0x80000000 0x1 0x80000000>;
+	};
+
+	/* memory@14000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x14000000 0x0 0x02000000>;
+	}; */
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x0 0x2c010000 0 0x1000>,
+		      <0x0 0x2c02f000 0 0x1000>,
+		      <0x0 0x2c04f000 0 0x2000>,
+		      <0x0 0x2c06f000 0 0x2000>;
+		interrupts = <GIC_PPI 9 0xf04>;
+	};
+
+	msi0: msi@2c1c0000 {
+		compatible = "arm,gic-msi";
+		reg = <0x0 0x2c1c0000 0 0x10000
+		       0x0 0x2c1d0000 0 0x10000
+		       0x0 0x2c1e0000 0 0x10000
+		       0x0 0x2c1f0000 0 0x10000>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 0xff01>,
+			     <GIC_PPI 14 0xff01>,
+			     <GIC_PPI 11 0xff01>,
+			     <GIC_PPI 10 0xff01>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 60 4>,
+			     <GIC_SPI 61 4>,
+			     <GIC_SPI 62 4>,
+			     <GIC_SPI 63 4>;
+	};
+
+	psci {
+		compatible = "arm,psci";
+		method = "smc";
+		cpu_suspend = <0xC4000001>;
+		cpu_off = <0x84000002>;
+		cpu_on = <0xC4000003>;
+		migrate = <0xC4000005>;
+	};
+
+	pci0: pci@30000000 {
+		compatible = "arm,pcie-xr3";
+		device_type = "pci";
+		reg = <0 0x7ff30000 0 0x1000
+		       0 0x7ff20000 0 0x10000
+		       0 0x40000000 0 0x10000000>;
+		bus-range = <0 255>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		ranges = <0x01000000 0x0 0x00000000 0x00 0x5ff00000 0x0 0x00100000
+		          0x02000000 0x0 0x00000000 0x40 0x00000000 0x0 0x80000000
+			  0x42000000 0x0 0x80000000 0x40 0x80000000 0x0 0x80000000>;
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &gic 0 136 4
+			         0 0 0 2 &gic 0 137 4
+				 0 0 0 3 &gic 0 138 4
+				 0 0 0 4 &gic 0 139 4>;
+	};
+
+	scpi: scpi@2b1f0000 {
+		compatible = "arm,scpi-mhu";
+		reg = <0x0 0x2b1f0000 0x0 0x10000>,   /* MHU registers */
+		      <0x0 0x2e000000 0x0 0x10000>;   /* Payload area */
+		interrupts = <0 36 4>,   /* low priority interrupt */
+			     <0 35 4>,   /* high priority interrupt */
+			     <0 37 4>;   /* secure channel interrupt */
+		#clock-cells = <1>;
+		clock-output-names = "a57", "a53", "gpu", "hdlcd0", "hdlcd1";
+	};
+
+	hdlcd0_osc: scpi_osc@3 {
+		compatible = "arm,scpi-osc";
+		#clock-cells = <0>;
+		clocks = <&scpi 3>;
+		frequency-range = <23000000 210000000>;
+		clock-output-names = "pxlclk0";
+	};
+
+	hdlcd1_osc: scpi_osc@4 {
+		compatible = "arm,scpi-osc";
+		#clock-cells = <0>;
+		clocks = <&scpi 4>;
+		frequency-range = <23000000 210000000>;
+		clock-output-names = "pxlclk1";
+	};
+
+	soc_uartclk: refclk72738khz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <7273800>;
+		clock-output-names = "juno:uartclk";
+	};
+
+	soc_refclk24mhz: clk24mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <24000000>;
+		clock-output-names = "juno:clk24mhz";
+	};
+
+	mb_eth25mhz: clk25mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <25000000>;
+		clock-output-names = "ethclk25mhz";
+	};
+
+	soc_usb48mhz: clk48mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <48000000>;
+		clock-output-names = "clk48mhz";
+	};
+
+	soc_smc50mhz: clk50mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <50000000>;
+		clock-output-names = "smc_clk";
+	};
+
+	soc_refclk100mhz: refclk100mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "apb_pclk";
+	};
+
+	soc_faxiclk: refclk533mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <533000000>;
+		clock-output-names = "faxi_clk";
+	};
+
+	soc_fixed_3v3: fixedregulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	memory-controller@7ffd0000 {
+		compatible = "arm,pl354", "arm,primecell";
+		reg = <0 0x7ffd0000 0 0x1000>;
+		interrupts = <0 86 4>,
+			     <0 87 4>;
+		clocks = <&soc_smc50mhz>;
+		clock-names = "apb_pclk";
+		chip5-memwidth = <16>;
+	};
+
+	dma0: dma@0x7ff00000 {
+		compatible = "arm,pl330", "arm,primecell";
+		reg = <0x0 0x7ff00000 0 0x1000>;
+		interrupts = <0 95 4>,
+			     <0 88 4>,
+			     <0 89 4>,
+			     <0 90 4>,
+			     <0 91 4>,
+			     <0 108 4>,
+			     <0 109 4>,
+			     <0 110 4>,
+			     <0 111 4>;
+		#dma-cells = <1>;
+		#dma-channels = <8>;
+		#dma-requests = <32>;
+		clocks = <&soc_faxiclk>;
+		clock-names = "apb_pclk";
+	};
+
+	soc_uart0: uart@7ff80000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0x0 0x7ff80000 0x0 0x1000>;
+		interrupts = <0 83 4>;
+		clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+		clock-names = "uartclk", "apb_pclk";
+		dmas = <&dma0 1
+			&dma0 2>;
+		dma-names = "rx", "tx";
+	};
+
+	/* this UART is reserved for secure software.
+	soc_uart1: uart@7ff70000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0x0 0x7ff70000 0x0 0x1000>;
+		interrupts = <0 84 4>;
+		clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+		clock-names = "uartclk", "apb_pclk";
+	}; */
+
+	ulpi_phy: phy@0 {
+		compatible = "phy-ulpi-generic";
+		reg = <0x0 0x94 0x0 0x4>;
+		phy-id = <0>;
+	};
+
+	ehci@7ffc0000 {
+		compatible = "snps,ehci-h20ahb";
+		/* compatible = "arm,h20ahb-ehci"; */
+		reg = <0x0 0x7ffc0000 0x0 0x10000>;
+		interrupts = <0 117 4>;
+		clocks = <&soc_usb48mhz>;
+		clock-names = "otg";
+		phys = <&ulpi_phy>;
+	};
+
+	ohci@0x7ffb0000 {
+		compatible = "generic-ohci";
+		reg = <0x0 0x7ffb0000 0x0 0x10000>;
+		interrupts = <0 116 4>;
+		clocks = <&soc_usb48mhz>;
+		clock-names = "otg";
+	};
+
+	i2c@0x7ffa0000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,designware-i2c";
+		reg = <0x0 0x7ffa0000 0x0 0x1000>;
+		interrupts = <0 104 4>;
+		clock-frequency = <400000>;
+		i2c-sda-hold-time-ns = <500>;
+		clocks = <&soc_smc50mhz>;
+
+		dvi0: dvi-transmitter@70 {
+			compatible = "nxp,tda998x";
+			reg = <0x70>;
+		};
+
+		dvi1: dvi-transmitter@71 {
+			compatible = "nxp,tda998x";
+			reg = <0x71>;
+		};
+	};
+
+	/* mmci@1c050000 {
+		compatible = "arm,pl180", "arm,primecell";
+		reg = <0x0 0x1c050000 0x0 0x1000>;
+		interrupts = <0 73 4>,
+			     <0 74 4>;
+		max-frequency = <12000000>;
+		vmmc-supply = <&soc_fixed_3v3>;
+		clocks = <&soc_refclk24mhz>, <&soc_refclk100mhz>;
+		clock-names = "mclk", "apb_pclk";
+	}; */
+
+	hdlcd@7ff60000 {
+		compatible = "arm,hdlcd";
+		reg = <0 0x7ff60000 0 0x1000>;
+		interrupts = <0 85 4>;
+		clocks = <&hdlcd0_osc>;
+		clock-names = "pxlclk";
+		i2c-slave = <&dvi0>;
+
+		/* display-timings {
+			native-mode = <&timing0>;
+			timing0: timing@0 {
+				/* 1024 x 768 framebufer, standard VGA timings * /
+				clock-frequency = <65000>;
+				hactive = <1024>;
+				vactive = <768>;
+				hfront-porch = <24>;
+				hback-porch = <160>;
+				hsync-len = <136>;
+				vfront-porch = <3>;
+				vback-porch = <29>;
+				vsync-len = <6>;
+			};
+		}; */
+	};
+
+	hdlcd@7ff50000 {
+		compatible = "arm,hdlcd";
+		reg = <0 0x7ff50000 0 0x1000>;
+		interrupts = <0 93 4>;
+		clocks = <&hdlcd1_osc>;
+		clock-names = "pxlclk";
+		i2c-slave = <&dvi1>;
+
+		display-timings {
+			native-mode = <&timing1>;
+			timing1: timing@1 {
+				/* 1024 x 768 framebufer, standard VGA timings */
+				clock-frequency = <65000>;
+				hactive = <1024>;
+				vactive = <768>;
+				hfront-porch = <24>;
+				hback-porch = <160>;
+				hsync-len = <136>;
+				vfront-porch = <3>;
+				vback-porch = <29>;
+				vsync-len = <6>;
+			};
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 15>;
+		interrupt-map = <0 0  0 &gic 0  68 4>,
+				<0 0  1 &gic 0  69 4>,
+				<0 0  2 &gic 0  70 4>,
+				<0 0  3 &gic 0 160 4>,
+				<0 0  4 &gic 0 161 4>,
+				<0 0  5 &gic 0 162 4>,
+				<0 0  6 &gic 0 163 4>,
+				<0 0  7 &gic 0 164 4>,
+				<0 0  8 &gic 0 165 4>,
+				<0 0  9 &gic 0 166 4>,
+				<0 0 10 &gic 0 167 4>,
+				<0 0 11 &gic 0 168 4>,
+				<0 0 12 &gic 0 169 4>;
+
+		motherboard {
+			model = "V2M-Juno";
+			arm,hbi = <0x252>;
+			arm,vexpress,site = <0>;
+			arm,v2m-memory-map = "rs1";
+			compatible = "arm,vexpress,v2p-p1", "simple-bus";
+			#address-cells = <2>;  /* SMB chipselect number and offset */
+			#size-cells = <1>;
+			#interrupt-cells = <1>;
+			ranges;
+
+			usb@5,00000000 {
+				compatible = "nxp,usb-isp1763";
+				reg = <5 0x00000000 0x20000>;
+				bus-width = <16>;
+				interrupts = <4>;
+			};
+
+			ethernet@2,00000000 {
+				compatible = "smsc,lan9118", "smsc,lan9115";
+				reg = <2 0x00000000 0x10000>;
+				interrupts = <3>;
+				phy-mode = "mii";
+				reg-io-width = <4>;
+				smsc,irq-active-high;
+				smsc,irq-push-pull;
+				clocks = <&mb_eth25mhz>;
+				vdd33a-supply = <&soc_fixed_3v3>; /* change this */
+				vddvario-supply = <&soc_fixed_3v3>; /* and this */
+			};
+
+			iofpga@3,00000000 {
+				compatible = "arm,amba-bus", "simple-bus";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 3 0 0x200000>;
+
+				kmi@060000 {
+					compatible = "arm,pl050", "arm,primecell";
+					reg = <0x060000 0x1000>;
+					interrupts = <8>;
+					clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+					clock-names = "KMIREFCLK", "apb_pclk";
+				};
+
+				kmi@070000 {
+					compatible = "arm,pl050", "arm,primecell";
+					reg = <0x070000 0x1000>;
+					interrupts = <8>;
+					clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+					clock-names = "KMIREFCLK", "apb_pclk";
+				};
+
+				wdt@0f0000 {
+					compatible = "arm,sp805", "arm,primecell";
+					reg = <0x0f0000 0x10000>;
+					interrupts = <7>;
+					clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+					clock-names = "wdogclk", "apb_pclk";
+				};
+
+				v2m_timer01: timer@110000 {
+					compatible = "arm,sp804", "arm,primecell";
+					reg = <0x110000 0x10000>;
+					interrupts = <9>;
+					clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+					clock-names = "timclken1", "apb_pclk";
+				};
+
+				v2m_timer23: timer@120000 {
+					compatible = "arm,sp804", "arm,primecell";
+					reg = <0x120000 0x10000>;
+					interrupts = <9>;
+					clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+					clock-names = "timclken1", "apb_pclk";
+				};
+
+				rtc@170000 {
+					compatible = "arm,pl031", "arm,primecell";
+					reg = <0x170000 0x10000>;
+					interrupts = <0>;
+					clocks = <&soc_smc50mhz>;
+					clock-names = "apb_pclk";
+				};
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
index 572005ea2217..28ed4ba3391a 100644
--- a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
+++ b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
@@ -27,37 +27,70 @@
 		serial3 = &v2m_serial3;
 	};
 
+	psci {
+		compatible = "arm,psci";
+		method = "smc";
+		/*
+		 * Function IDs usage and compliancy with PSCI v0.2 still
+		 * under discussion.  Current IDs should be considered
+		 * temporary for demonstration purposes.
+		 */
+		cpu_suspend = <0x84000001>;
+		cpu_off = <0x84000002>;
+		cpu_on = <0x84000003>;
+	};
+
 	cpus {
 		#address-cells = <2>;
 		#size-cells = <0>;
 
+		idle-states {
+			entry-method = "arm,psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				entry-method-param = <0x0010000>;
+				entry-latency-us = <40>;
+				exit-latency-us = <100>;
+				min-residency-us = <150>;
+			};
+
+			CLUSTER_SLEEP_0: cluster-sleep-0 {
+				compatible = "arm,idle-state";
+				entry-method-param = <0x1010000>;
+				entry-latency-us = <500>;
+				exit-latency-us = <1000>;
+				min-residency-us = <2500>;
+			};
+		};
+
 		cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,armv8";
 			reg = <0x0 0x0>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
 		cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,armv8";
 			reg = <0x0 0x1>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
 		cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,armv8";
 			reg = <0x0 0x2>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
 		cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,armv8";
 			reg = <0x0 0x3>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
+			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
 	};
 
@@ -157,3 +190,5 @@
 		/include/ "rtsm_ve-motherboard.dtsi"
 	};
 };
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index b45e5f39f577..b683d4703582 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -182,6 +182,15 @@
 				interrupts = <14>;
 				clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
 				clock-names = "clcdclk", "apb_pclk";
+				mode = "XVGA";
+				use_dma = <0>;
+				framebuffer = <0x18000000 0x00180000>;
+			};
+
+			virtio_block@0130000 {
+				compatible = "virtio,mmio";
+				reg = <0x130000 0x200>;
+				interrupts = <42>;
 			};
 		};
 
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 8d9696adb440..8e323147c375 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
@@ -19,13 +18,17 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_XGENE=y
 CONFIG_SMP=y
+CONFIG_PREEMPT=y
 CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_CMA=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -42,29 +45,42 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 # CONFIG_BLK_DEV is not set
+CONFIG_DMA_CMA=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
-CONFIG_MII=y
 CONFIG_SMC91X=y
+CONFIG_SMSC911X=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_LEGACY_PTY_COUNT=16
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_FB=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB=y
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
@@ -86,3 +102,4 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_FTRACE is not set
 CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_DMA_CMA=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000000..5562652c5316
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,53 @@
+
+menuconfig ARM64_CRYPTO
+	bool "ARM64 Accelerated Cryptographic Algorithms"
+	depends on ARM64
+	help
+	  Say Y here to choose from a selection of cryptographic algorithms
+	  implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_SHA2_ARM64_CE
+	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_GHASH_ARM64_CE
+	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+config CRYPTO_AES_ARM64_CE
+	tristate "AES core cipher using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+
+config CRYPTO_AES_ARM64_CE_CCM
+	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+	select CRYPTO_AEAD
+
+config CRYPTO_AES_ARM64_CE_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_ABLK_HELPER
+
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000000..2070a56ecc46
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,38 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o		:= -DINTERLEAVE=2 -DINTERLEAVE_INLINE
+AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+	$(call if_changed_dep,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000000..432e4841cd81
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.arch	armv8-a+crypto
+
+	/*
+	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+	 *			     u32 *macp, u8 const rk[], u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_auth_data)
+	ldr	w8, [x3]			/* leftover from prev round? */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cbz	w8, 1f
+	sub	w8, w8, #16
+	eor	v1.16b, v1.16b, v1.16b
+0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
+	subs	w2, w2, #1
+	add	w8, w8, #1
+	ins	v1.b[0], w7
+	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
+	beq	8f				/* out of input? */
+	cbnz	w8, 0b
+	eor	v0.16b, v0.16b, v1.16b
+1:	ld1	{v3.2d}, [x4]			/* load first round key */
+	prfm	pldl1strm, [x1]
+	cmp	w5, #12				/* which key size? */
+	add	x6, x4, #16
+	sub	w7, w5, #2			/* modified # of rounds */
+	bmi	2f
+	bne	5f
+	mov	v5.16b, v3.16b
+	b	4f
+2:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
+3:	aese	v0.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	ld1	{v5.2d}, [x6], #16		/* load next round key */
+	bpl	3b
+	aese	v0.16b, v4.16b
+	subs	w2, w2, #16			/* last data? */
+	eor	v0.16b, v0.16b, v5.16b		/* final round */
+	bmi	6f
+	ld1	{v1.16b}, [x1], #16		/* load next input block */
+	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
+	bne	1b
+6:	st1	{v0.2d}, [x0]			/* store mac */
+	beq	10f
+	adds	w2, w2, #16
+	beq	10f
+	mov	w8, w2
+7:	ldrb	w7, [x1], #1
+	umov	w6, v0.b[0]
+	eor	w6, w6, w7
+	strb	w6, [x0], #1
+	subs	w2, w2, #1
+	beq	10f
+	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
+	b	7b
+8:	mov	w7, w8
+	add	w8, w8, #16
+9:	ext	v1.16b, v1.16b, v1.16b, #1
+	adds	w7, w7, #1
+	bne	9b
+	eor	v0.16b, v0.16b, v1.16b
+	st1	{v0.2d}, [x0]
+10:	str	w8, [x3]
+	ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+	/*
+	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+	 * 			 u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_final)
+	ld1	{v3.2d}, [x2], #16		/* load first round key */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cmp	w3, #12				/* which key size? */
+	sub	w3, w3, #2			/* modified # of rounds */
+	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
+	bmi	0f
+	bne	3f
+	mov	v5.16b, v3.16b
+	b	2f
+0:	mov	v4.16b, v3.16b
+1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aese	v1.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
+	subs	w3, w3, #3
+	aese	v0.16b, v3.16b
+	aese	v1.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+	bpl	1b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	/* final round key cancels out */
+	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
+	st1	{v0.2d}, [x0]			/* store result */
+	ret
+ENDPROC(ce_aes_ccm_final)
+
+	.macro	aes_ccm_do_crypt,enc
+	ldr	x8, [x6, #8]			/* load lower ctr */
+	ld1	{v0.2d}, [x5]			/* load mac */
+	rev	x8, x8				/* keep swabbed ctr in reg */
+0:	/* outer loop */
+	ld1	{v1.1d}, [x6]			/* load upper ctr */
+	prfm	pldl1strm, [x1]
+	add	x8, x8, #1
+	rev	x9, x8
+	cmp	w4, #12				/* which key size? */
+	sub	w7, w4, #2			/* get modified # of rounds */
+	ins	v1.d[1], x9			/* no carry in lower ctr */
+	ld1	{v3.2d}, [x3]			/* load first round key */
+	add	x10, x3, #16
+	bmi	1f
+	bne	4f
+	mov	v5.16b, v3.16b
+	b	3f
+1:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
+2:	/* inner loop: 3 rounds, 2x interleaved */
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aese	v1.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aese	v1.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+	ld1	{v5.2d}, [x10], #16		/* load next round key */
+	bpl	2b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	subs	w2, w2, #16
+	bmi	6f				/* partial block? */
+	ld1	{v2.16b}, [x1], #16		/* load next input block */
+	.if	\enc == 1
+	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
+	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
+	.else
+	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
+	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
+	.endif
+	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
+	st1	{v1.16b}, [x0], #16		/* write output block */
+	bne	0b
+	rev	x8, x8
+	st1	{v0.2d}, [x5]			/* store mac */
+	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
+5:	ret
+
+6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
+	st1	{v0.2d}, [x5]			/* store mac */
+	add	w2, w2, #16			/* process partial tail block */
+7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
+	umov	w6, v1.b[0]			/* get top crypted ctr byte */
+	umov	w7, v0.b[0]			/* get top mac byte */
+	.if	\enc == 1
+	eor	w7, w7, w9
+	eor	w9, w9, w6
+	.else
+	eor	w9, w9, w6
+	eor	w7, w7, w9
+	.endif
+	strb	w9, [x0], #1			/* store out byte */
+	strb	w7, [x5], #1			/* store mac byte */
+	subs	w2, w2, #1
+	beq	5b
+	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
+	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
+	b	7b
+	.endm
+
+	/*
+	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 */
+ENTRY(ce_aes_ccm_encrypt)
+	aes_ccm_do_crypt	1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+	aes_ccm_do_crypt	0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000000..9e6cdde9b43d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,297 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+				     u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+				 u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+		      unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	ret = crypto_aes_expand_key(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+	u32 l = req->iv[0] + 1;
+
+	/* verify that CCM dimension 'L' is set correctly in the IV */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+
+	/* verify that msglen can in fact be represented in L bytes */
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	/*
+	 * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+	 * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+	 */
+	n[0] = 0;
+	n[1] = cpu_to_be32(msglen);
+
+	memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+	/*
+	 * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+	 * - bits 0..2	: max # of bytes required to represent msglen, minus 1
+	 *                (already set by caller)
+	 * - bits 3..5	: size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+	 * - bit 6	: indicates presence of authenticate-only data
+	 */
+	maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+	if (req->assoclen)
+		maciv[0] |= 0x40;
+
+	memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+	return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	u32 macp = 0;
+
+	/* prepend the AAD with a length tag */
+	if (len < 0xff00) {
+		ltag.l = cpu_to_be16(len);
+		ltag.len = 2;
+	} else  {
+		ltag.l = cpu_to_be16(0xfffe);
+		put_unaligned_be32(len, &ltag.h);
+		ltag.len = 6;
+	}
+
+	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+			     num_rounds(ctx));
+	scatterwalk_start(&walk, req->assoc);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+				     num_rounds(ctx));
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen - authsize;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (memcmp(mac, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+	.cra_name		= "ccm(aes)",
+	.cra_driver_name	= "ccm-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_aead_type,
+	.cra_module		= THIS_MODULE,
+	.cra_aead = {
+		.ivsize		= AES_BLOCK_SIZE,
+		.maxauthsize	= AES_BLOCK_SIZE,
+		.setkey		= ccm_setkey,
+		.setauthsize	= ccm_setauthsize,
+		.encrypt	= ccm_encrypt,
+		.decrypt	= ccm_decrypt,
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_AES))
+		return -ENODEV;
+	return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000000..2075e1acae6b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,155 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+	u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aese	v0.16b, v2.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aese	v0.16b, v3.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aese	v0.16b, v1.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aese	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_enc),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aesd	v0.16b, v2.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aesd	v0.16b, v3.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aesd	v0.16b, v1.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aesd	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_dec),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_cipher = {
+		.cia_min_keysize	= AES_MIN_KEY_SIZE,
+		.cia_max_keysize	= AES_MAX_KEY_SIZE,
+		.cia_setkey		= crypto_aes_set_key,
+		.cia_encrypt		= aes_cipher_encrypt,
+		.cia_decrypt		= aes_cipher_decrypt
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000000..685a18f731eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ *                                    Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(ce_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
+
+	.arch		armv8-a+crypto
+
+	/* preload all round keys */
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.16b-v18.16b}, [\rk], #32
+1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
+2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
+	ld1		{v25.16b-v28.16b}, [\rk], #64
+	ld1		{v29.16b-v31.16b}, [\rk]
+	.endm
+
+	/* prepare for encryption with key in rk[] */
+	.macro		enc_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for encryption (again) but with new key in rk[] */
+	.macro		enc_switch_key, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for decryption with key in rk[] */
+	.macro		dec_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\de		\i3\().16b, \k\().16b
+	.endif
+	.endif
+	aes\mc		\i0\().16b, \i0\().16b
+	.ifnb		\i1
+	aes\mc		\i1\().16b, \i1\().16b
+	.ifnb		\i3
+	aes\mc		\i2\().16b, \i2\().16b
+	aes\mc		\i3\().16b, \i3\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved encryption rounds with the same round key */
+	.macro		round_Nx, enc, k, i0, i1, i2, i3
+	.ifc		\enc, e
+	do_enc_Nx	e, mc, \k, \i0, \i1, \i2, \i3
+	.else
+	do_enc_Nx	d, imc, \k, \i0, \i1, \i2, \i3
+	.endif
+	.endm
+
+	/* up to 4 interleaved final rounds */
+	.macro		fin_round_Nx, de, k, k2, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\de		\i3\().16b, \k\().16b
+	.endif
+	.endif
+	eor		\i0\().16b, \i0\().16b, \k2\().16b
+	.ifnb		\i1
+	eor		\i1\().16b, \i1\().16b, \k2\().16b
+	.ifnb		\i3
+	eor		\i2\().16b, \i2\().16b, \k2\().16b
+	eor		\i3\().16b, \i3\().16b, \k2\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved blocks */
+	.macro		do_block_Nx, enc, rounds, i0, i1, i2, i3
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	round_Nx	\enc, v17, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v18, \i0, \i1, \i2, \i3
+1111:	round_Nx	\enc, v19, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v20, \i0, \i1, \i2, \i3
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	round_Nx	\enc, \key, \i0, \i1, \i2, \i3
+	.endr
+	fin_round_Nx	\enc, v30, v31, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		encrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \in
+	.endm
+
+	.macro		encrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1
+	.endm
+
+	.macro		encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		decrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \in
+	.endm
+
+	.macro		decrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1
+	.endm
+
+	.macro		decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000000..60f2f4c12256
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,446 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE			"ce"
+#define PRIO			300
+#define aes_ecb_encrypt		ce_aes_ecb_encrypt
+#define aes_ecb_decrypt		ce_aes_ecb_decrypt
+#define aes_cbc_encrypt		ce_aes_cbc_encrypt
+#define aes_cbc_decrypt		ce_aes_cbc_decrypt
+#define aes_ctr_encrypt		ce_aes_ctr_encrypt
+#define aes_xts_encrypt		ce_aes_xts_encrypt
+#define aes_xts_decrypt		ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE			"neon"
+#define PRIO			200
+#define aes_ecb_encrypt		neon_aes_ecb_encrypt
+#define aes_ecb_decrypt		neon_aes_ecb_decrypt
+#define aes_cbc_encrypt		neon_aes_cbc_encrypt
+#define aes_cbc_decrypt		neon_aes_cbc_decrypt
+#define aes_ctr_encrypt		neon_aes_ctr_encrypt
+#define aes_xts_encrypt		neon_aes_xts_encrypt
+#define aes_xts_decrypt		neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS("ecb(aes)");
+MODULE_ALIAS("cbc(aes)");
+MODULE_ALIAS("ctr(aes)");
+MODULE_ALIAS("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+
+struct crypto_aes_xts_ctx {
+	struct crypto_aes_ctx key1;
+	struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+	if (!ret)
+		ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
+					    key_len / 2);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+	first = 1;
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		first = 0;
+		nbytes -= blocks * AES_BLOCK_SIZE;
+		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+			break;
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	if (nbytes) {
+		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+		/*
+		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+		 * to tell aes_ctr_encrypt() to only read half a block.
+		 */
+		blocks = (nbytes <= 8) ? -1 : 1;
+
+		aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+				blocks, walk.iv, first);
+		memcpy(tdst, tail, nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_enc, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_dec, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		= "__ecb-aes-" MODE,
+	.cra_driver_name	= "__driver-ecb-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= ecb_encrypt,
+		.decrypt	= ecb_decrypt,
+	},
+}, {
+	.cra_name		= "__cbc-aes-" MODE,
+	.cra_driver_name	= "__driver-cbc-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= cbc_encrypt,
+		.decrypt	= cbc_decrypt,
+	},
+}, {
+	.cra_name		= "__ctr-aes-" MODE,
+	.cra_driver_name	= "__driver-ctr-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= ctr_encrypt,
+		.decrypt	= ctr_encrypt,
+	},
+}, {
+	.cra_name		= "__xts-aes-" MODE,
+	.cra_driver_name	= "__driver-xts-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= xts_set_key,
+		.encrypt	= xts_encrypt,
+		.decrypt	= xts_decrypt,
+	},
+}, {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aes_init(void)
+{
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000000..f6e372c528eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+	.text
+	.align		4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare	- setup NEON registers for encryption
+ * - dec_prepare	- setup NEON registers for decryption
+ * - enc_switch_key	- change to new key after having prepared for encryption
+ * - encrypt_block	- encrypt a single block
+ * - decrypt block	- decrypt a single block
+ * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP	ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+	.macro		do_encrypt_block2x
+	bl		aes_encrypt_block2x
+	.endm
+
+	.macro		do_decrypt_block2x
+	bl		aes_decrypt_block2x
+	.endm
+
+	.macro		do_encrypt_block4x
+	bl		aes_encrypt_block4x
+	.endm
+
+	.macro		do_decrypt_block4x
+	bl		aes_decrypt_block4x
+	.endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+	.macro		do_encrypt_block2x
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block2x
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_encrypt_block4x
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block4x
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+#endif
+
+	/*
+	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 */
+
+AES_ENTRY(aes_ecb_encrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbencloopNx
+
+	enc_prepare	w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	do_encrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	do_encrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbencloopNx
+.Lecbenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbencout
+#endif
+.Lecbencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbencloop
+.Lecbencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbdecloopNx
+
+	dec_prepare	w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	do_decrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	do_decrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbdecloopNx
+.Lecbdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbdecout
+#endif
+.Lecbdecloop:
+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
+	decrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbdecloop
+.Lecbdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+	/*
+	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 */
+
+AES_ENTRY(aes_cbc_encrypt)
+	cbz		w6, .Lcbcencloop
+
+	ld1		{v0.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x5
+
+.Lcbcencloop:
+	ld1		{v1.16b}, [x1], #16		/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcencloop
+	ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+	FRAME_PUSH
+	cbz		w6, .LcbcdecloopNx
+
+	ld1		{v7.16b}, [x5]			/* get iv */
+	dec_prepare	w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lcbcdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	mov		v2.16b, v0.16b
+	mov		v3.16b, v1.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v2.16b
+	mov		v7.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	mov		v4.16b, v0.16b
+	mov		v5.16b, v1.16b
+	mov		v6.16b, v2.16b
+	do_decrypt_block4x
+	sub		x1, x1, #16
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v4.16b
+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
+	eor		v2.16b, v2.16b, v5.16b
+	eor		v3.16b, v3.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LcbcdecloopNx
+.Lcbcdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lcbcdecout
+#endif
+.Lcbcdecloop:
+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
+	mov		v0.16b, v1.16b			/* ...and copy to v0 */
+	decrypt_block	v0, w3, x2, x5, w6
+	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
+	mov		v7.16b, v1.16b			/* ct is next iv */
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcdecloop
+.Lcbcdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 ctr[], int first)
+	 */
+
+AES_ENTRY(aes_ctr_encrypt)
+	FRAME_PUSH
+	cbnz		w6, .Lctrfirst		/* 1st time around? */
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrinc
+	add		x5, x5, #1		/* increment BE ctr */
+	b		.LctrincNx
+#else
+	b		.Lctrinc
+#endif
+.Lctrfirst:
+	enc_prepare	w3, x2, x6
+	ld1		{v4.16b}, [x5]
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrloop
+.LctrloopNx:
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lctr1x
+#if INTERLEAVE == 2
+	mov		v0.8b, v4.8b
+	mov		v1.8b, v4.8b
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v0.d[1], x7
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v1.d[1], x7
+	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v2.16b
+	eor		v1.16b, v1.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
+	dup		v7.4s, w5
+	mov		v0.16b, v4.16b
+	add		v7.4s, v7.4s, v8.4s
+	mov		v1.16b, v4.16b
+	rev32		v8.16b, v7.16b
+	mov		v2.16b, v4.16b
+	mov		v3.16b, v4.16b
+	mov		v1.s[3], v8.s[0]
+	mov		v2.s[3], v8.s[1]
+	mov		v3.s[3], v8.s[2]
+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
+	do_encrypt_block4x
+	eor		v0.16b, v5.16b, v0.16b
+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
+	eor		v1.16b, v6.16b, v1.16b
+	eor		v2.16b, v7.16b, v2.16b
+	eor		v3.16b, v5.16b, v3.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	add		x5, x5, #INTERLEAVE
+#endif
+	cbz		w4, .LctroutNx
+.LctrincNx:
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.LctrloopNx
+.LctroutNx:
+	sub		x5, x5, #1
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.Lctrout
+.Lctr1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lctrout
+#endif
+.Lctrloop:
+	mov		v0.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	subs		w4, w4, #1
+	bmi		.Lctrhalfblock		/* blocks < 0 means 1/2 block */
+	ld1		{v3.16b}, [x1], #16
+	eor		v3.16b, v0.16b, v3.16b
+	st1		{v3.16b}, [x0], #16
+	beq		.Lctrout
+.Lctrinc:
+	adds		x5, x5, #1		/* increment BE ctr */
+	rev		x7, x5
+	ins		v4.d[1], x7
+	bcc		.Lctrloop		/* no overflow? */
+	umov		x7, v4.d[0]		/* load upper word of ctr  */
+	rev		x7, x7			/* ... to handle the carry */
+	add		x7, x7, #1
+	rev		x7, x7
+	ins		v4.d[0], x7
+	b		.Lctrloop
+.Lctrhalfblock:
+	ld1		{v3.8b}, [x1]
+	eor		v3.8b, v0.8b, v3.8b
+	st1		{v3.8b}, [x0]
+.Lctrout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ctr_encrypt)
+	.ltorg
+
+
+	/*
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 */
+
+	.macro		next_tweak, out, in, const, tmp
+	sshr		\tmp\().2d,  \in\().2d,   #63
+	and		\tmp\().16b, \tmp\().16b, \const\().16b
+	add		\out\().2d,  \in\().2d,   \in\().2d
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\out\().16b, \out\().16b, \tmp\().16b
+	.endm
+
+.Lxts_mul_x:
+	.word		1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsencloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	enc_switch_key	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsencNx
+
+.LxtsencloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsencoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsencNx
+.LxtsencoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsencout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_encrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsencout
+	b		.LxtsencloopNx
+#endif
+.Lxtsenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsencout
+#endif
+.Lxtsencloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsencout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsencloop
+.Lxtsencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsdecloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	dec_prepare	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsdecNx
+
+.LxtsdecloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsdecoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsdecNx
+.LxtsdecoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsdecout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_decrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsdecout
+	b		.LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsdecout
+#endif
+.Lxtsdecloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	decrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsdecout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsdecloop
+.Lxtsdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000000..b93170e1cc93
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(neon_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
+
+	/* multiply by polynomial 'x' in GF(2^8) */
+	.macro		mul_by_x, out, in, temp, const
+	sshr		\temp, \in, #7
+	add		\out, \in, \in
+	and		\temp, \temp, \const
+	eor		\out, \out, \temp
+	.endm
+
+	/* preload the entire Sbox */
+	.macro		prepare, sbox, shiftrows, temp
+	adr		\temp, \sbox
+	movi		v12.16b, #0x40
+	ldr		q13, \shiftrows
+	movi		v14.16b, #0x1b
+	ld1		{v16.16b-v19.16b}, [\temp], #64
+	ld1		{v20.16b-v23.16b}, [\temp], #64
+	ld1		{v24.16b-v27.16b}, [\temp], #64
+	ld1		{v28.16b-v31.16b}, [\temp]
+	.endm
+
+	/* do preload for encryption */
+	.macro		enc_prepare, ignore0, ignore1, temp
+	prepare		.LForward_Sbox, .LForward_ShiftRows, \temp
+	.endm
+
+	.macro		enc_switch_key, ignore0, ignore1, temp
+	/* do nothing */
+	.endm
+
+	/* do preload for decryption */
+	.macro		dec_prepare, ignore0, ignore1, temp
+	prepare		.LReverse_Sbox, .LReverse_ShiftRows, \temp
+	.endm
+
+	/* apply SubBytes transformation using the the preloaded Sbox */
+	.macro		sub_bytes, in
+	sub		v9.16b, \in\().16b, v12.16b
+	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
+	sub		v10.16b, v9.16b, v12.16b
+	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v11.16b, v10.16b, v12.16b
+	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	/* apply MixColumns transformation */
+	.macro		mix_columns, in
+	mul_by_x	v10.16b, \in\().16b, v9.16b, v14.16b
+	rev32		v8.8h, \in\().8h
+	eor		\in\().16b, v10.16b, \in\().16b
+	shl		v9.4s, v8.4s, #24
+	shl		v11.4s, \in\().4s, #24
+	sri		v9.4s, v8.4s, #8
+	sri		v11.4s, \in\().4s, #8
+	eor		v9.16b, v9.16b, v8.16b
+	eor		v10.16b, v10.16b, v9.16b
+	eor		\in\().16b, v10.16b, v11.16b
+	.endm
+
+	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+	.macro		inv_mix_columns, in
+	mul_by_x	v11.16b, \in\().16b, v10.16b, v14.16b
+	mul_by_x	v11.16b, v11.16b, v10.16b, v14.16b
+	eor		\in\().16b, \in\().16b, v11.16b
+	rev32		v11.8h, v11.8h
+	eor		\in\().16b, \in\().16b, v11.16b
+	mix_columns	\in
+	.endm
+
+	.macro		do_block, enc, in, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
+	sub_bytes	\in
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns	\in
+	.else
+	inv_mix_columns	\in
+	.endif
+	b		1111b
+2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	.endm
+
+	.macro		encrypt_block, in, rounds, rk, rkp, i
+	do_block	1, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block, in, rounds, rk, rkp, i
+	do_block	0, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	/*
+	 * Interleaved versions: functionally equivalent to the
+	 * ones above, but applied to 2 or 4 AES states in parallel.
+	 */
+
+	.macro		sub_bytes_2x, in0, in1
+	sub		v8.16b, \in0\().16b, v12.16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, v8.16b, v12.16b
+	sub		v11.16b, v9.16b, v12.16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v10.16b, v12.16b
+	sub		v9.16b, v11.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v11.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	.endm
+
+	.macro		sub_bytes_4x, in0, in1, in2, in3
+	sub		v8.16b, \in0\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, \in2\().16b, v12.16b
+	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+	sub		v11.16b, \in3\().16b, v12.16b
+	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
+	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+	sshr		\tmp0\().16b, \in0\().16b,  #7
+	add		\out0\().16b, \in0\().16b,  \in0\().16b
+	sshr		\tmp1\().16b, \in1\().16b,  #7
+	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
+	add		\out1\().16b, \in1\().16b,  \in1\().16b
+	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
+	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
+	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
+	.endm
+
+	.macro		mix_columns_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	rev32		v10.8h, \in0\().8h
+	rev32		v11.8h, \in1\().8h
+	eor		\in0\().16b, v8.16b, \in0\().16b
+	eor		\in1\().16b, v9.16b, \in1\().16b
+	shl		v12.4s, v10.4s, #24
+	shl		v13.4s, v11.4s, #24
+	eor		v8.16b, v8.16b, v10.16b
+	sri		v12.4s, v10.4s, #8
+	shl		v10.4s, \in0\().4s, #24
+	eor		v9.16b, v9.16b, v11.16b
+	sri		v13.4s, v11.4s, #8
+	shl		v11.4s, \in1\().4s, #24
+	sri		v10.4s, \in0\().4s, #8
+	eor		\in0\().16b, v8.16b, v12.16b
+	sri		v11.4s, \in1\().4s, #8
+	eor		\in1\().16b, v9.16b, v13.16b
+	eor		\in0\().16b, v10.16b, \in0\().16b
+	eor		\in1\().16b, v11.16b, \in1\().16b
+	.endm
+
+	.macro		inv_mix_cols_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v8, v9, v8, v9, v10, v11, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	mix_columns_2x	\in0, \in1
+	.endm
+
+	.macro		inv_mix_cols_4x, in0, in1, in2, in3
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v10, v11, \in2, \in3, v12, v13, v14
+	mul_by_x_2x	v8, v9, v8, v9, v12, v13, v14
+	mul_by_x_2x	v10, v11, v10, v11, v12, v13, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	rev32		v10.8h, v10.8h
+	rev32		v11.8h, v11.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	.endm
+
+	.macro		do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	sub_bytes_2x	\in0, \in1
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_2x	\in0, \in1
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	sub_bytes_4x	\in0, \in1, \in2, \in3
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_4x	\in0, \in1, \in2, \in3
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		encrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	1, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	0, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+#include "aes-modes.S"
+
+	.text
+	.align		4
+.LForward_ShiftRows:
+	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+	.byte		0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+	.byte		0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+	.byte		0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+	.byte		0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+	.byte		0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+	.byte		0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+	.byte		0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+	.byte		0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+	.byte		0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+	.byte		0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+	.byte		0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+	.byte		0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+	.byte		0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+	.byte		0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+	.byte		0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+	.byte		0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+	.byte		0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+	.byte		0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+	.byte		0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+	.byte		0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+	.byte		0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+	.byte		0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+	.byte		0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+	.byte		0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+	.byte		0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+	.byte		0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+	.byte		0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+	.byte		0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+	.byte		0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+	.byte		0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+	.byte		0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..b9e6eaf41c9b
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,95 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
+ *
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *           Vinodh Gopal
+ *           Erdinc Ozturk
+ *           Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	DATA	.req	v0
+	SHASH	.req	v1
+	IN1	.req	v2
+	T1	.req	v2
+	T2	.req	v3
+	T3	.req	v4
+	VZR	.req	v5
+
+	.text
+	.arch		armv8-a+crypto
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update)
+	ld1		{DATA.16b}, [x1]
+	ld1		{SHASH.16b}, [x3]
+	eor		VZR.16b, VZR.16b, VZR.16b
+
+	/* do the head block first, if supplied */
+	cbz		x4, 0f
+	ld1		{IN1.2d}, [x4]
+	b		1f
+
+0:	ld1		{IN1.2d}, [x2], #16
+	sub		w0, w0, #1
+1:	ext		IN1.16b, IN1.16b, IN1.16b, #8
+CPU_LE(	rev64		IN1.16b, IN1.16b	)
+	eor		DATA.16b, DATA.16b, IN1.16b
+
+	/* multiply DATA by SHASH in GF(2^128) */
+	ext		T2.16b, DATA.16b, DATA.16b, #8
+	ext		T3.16b, SHASH.16b, SHASH.16b, #8
+	eor		T2.16b, T2.16b, DATA.16b
+	eor		T3.16b, T3.16b, SHASH.16b
+
+	pmull2		T1.1q, SHASH.2d, DATA.2d	// a1 * b1
+	pmull		DATA.1q, SHASH.1d, DATA.1d	// a0 * b0
+	pmull		T2.1q, T2.1d, T3.1d		// (a1 + a0)(b1 + b0)
+	eor		T2.16b, T2.16b, T1.16b		// (a0 * b1) + (a1 * b0)
+	eor		T2.16b, T2.16b, DATA.16b
+
+	ext		T3.16b, VZR.16b, T2.16b, #8
+	ext		T2.16b, T2.16b, VZR.16b, #8
+	eor		DATA.16b, DATA.16b, T3.16b
+	eor		T1.16b, T1.16b, T2.16b	// <T1:DATA> is result of
+						// carry-less multiplication
+
+	/* first phase of the reduction */
+	shl		T3.2d, DATA.2d, #1
+	eor		T3.16b, T3.16b, DATA.16b
+	shl		T3.2d, T3.2d, #5
+	eor		T3.16b, T3.16b, DATA.16b
+	shl		T3.2d, T3.2d, #57
+	ext		T2.16b, VZR.16b, T3.16b, #8
+	ext		T3.16b, T3.16b, VZR.16b, #8
+	eor		DATA.16b, DATA.16b, T2.16b
+	eor		T1.16b, T1.16b, T3.16b
+
+	/* second phase of the reduction */
+	ushr		T2.2d, DATA.2d, #5
+	eor		T2.16b, T2.16b, DATA.16b
+	ushr		T2.2d, T2.2d, #1
+	eor		T2.16b, T2.16b, DATA.16b
+	ushr		T2.2d, T2.2d, #1
+	eor		T1.16b, T1.16b, T2.16b
+	eor		DATA.16b, DATA.16b, T1.16b
+
+	cbnz		w0, 0b
+
+	st1		{DATA.16b}, [x1]
+	ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..b92baf3f68c7
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,155 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_key {
+	u64 a;
+	u64 b;
+};
+
+struct ghash_desc_ctx {
+	u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+				   struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+			unsigned int len)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	ctx->count += len;
+
+	if ((partial + len) >= GHASH_BLOCK_SIZE) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+		int blocks;
+
+		if (partial) {
+			int p = GHASH_BLOCK_SIZE - partial;
+
+			memcpy(ctx->buf + partial, src, p);
+			src += p;
+			len -= p;
+		}
+
+		blocks = len / GHASH_BLOCK_SIZE;
+		len %= GHASH_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(6);
+		pmull_ghash_update(blocks, ctx->digest, src, key,
+				   partial ? ctx->buf : NULL);
+		kernel_neon_end();
+		src += blocks * GHASH_BLOCK_SIZE;
+	}
+	if (len)
+		memcpy(ctx->buf + partial, src, len);
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	if (partial) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+		kernel_neon_begin_partial(6);
+		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+		kernel_neon_end();
+	}
+	put_unaligned_be64(ctx->digest[1], dst);
+	put_unaligned_be64(ctx->digest[0], dst + 8);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+	u64 a, b;
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	/* perform multiplication by 'x' in GF(2^128) */
+	b = get_unaligned_be64(inkey);
+	a = get_unaligned_be64(inkey + 8);
+
+	key->a = (a << 1) | (b >> 63);
+	key->b = (b << 1) | (a >> 63);
+
+	if (b >> 63)
+		key->b ^= 0xc200000000000000UL;
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_key),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..09d57d98609c
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	k0		.req	v0
+	k1		.req	v1
+	k2		.req	v2
+	k3		.req	v3
+
+	t0		.req	v4
+	t1		.req	v5
+
+	dga		.req	q6
+	dgav		.req	v6
+	dgb		.req	s7
+	dgbv		.req	v7
+
+	dg0q		.req	q12
+	dg0s		.req	s12
+	dg0v		.req	v12
+	dg1s		.req	s13
+	dg1v		.req	v13
+	dg2s		.req	s14
+
+	.macro		add_only, op, ev, rc, s0, dg1
+	.ifc		\ev, ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha1h		dg2s, dg0s
+	.ifnb		\dg1
+	sha1\op		dg0q, \dg1, t0.4s
+	.else
+	sha1\op		dg0q, dg1s, t0.4s
+	.endif
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha1h		dg1s, dg0s
+	sha1\op		dg0q, dg2s, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
+	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
+	add_only	\op, \ev, \rc, \s1, \dg1
+	sha1su1		v\s0\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA1 round constants
+	 */
+	.align		4
+.Lsha1_rcon:
+	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+	/*
+	 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+	 * 			  u8 *head, long bytes)
+	 */
+ENTRY(sha1_ce_transform)
+	/* load round constants */
+	adr		x6, .Lsha1_rcon
+	ld1r		{k0.4s}, [x6], #4
+	ld1r		{k1.4s}, [x6], #4
+	ld1r		{k2.4s}, [x6], #4
+	ld1r		{k3.4s}, [x6]
+
+	/* load state */
+	ldr		dga, [x2]
+	ldr		dgb, [x2, #16]
+
+	/* load partial state (if supplied) */
+	cbz		x3, 0f
+	ld1		{v8.4s-v11.4s}, [x3]
+	b		1f
+
+	/* load input */
+0:	ld1		{v8.4s-v11.4s}, [x1], #64
+	sub		w0, w0, #1
+
+1:
+CPU_LE(	rev32		v8.16b, v8.16b		)
+CPU_LE(	rev32		v9.16b, v9.16b		)
+CPU_LE(	rev32		v10.16b, v10.16b	)
+CPU_LE(	rev32		v11.16b, v11.16b	)
+
+2:	add		t0.4s, v8.4s, k0.4s
+	mov		dg0v.16b, dgav.16b
+
+	add_update	c, ev, k0,  8,  9, 10, 11, dgb
+	add_update	c, od, k0,  9, 10, 11,  8
+	add_update	c, ev, k0, 10, 11,  8,  9
+	add_update	c, od, k0, 11,  8,  9, 10
+	add_update	c, ev, k1,  8,  9, 10, 11
+
+	add_update	p, od, k1,  9, 10, 11,  8
+	add_update	p, ev, k1, 10, 11,  8,  9
+	add_update	p, od, k1, 11,  8,  9, 10
+	add_update	p, ev, k1,  8,  9, 10, 11
+	add_update	p, od, k2,  9, 10, 11,  8
+
+	add_update	m, ev, k2, 10, 11,  8,  9
+	add_update	m, od, k2, 11,  8,  9, 10
+	add_update	m, ev, k2,  8,  9, 10, 11
+	add_update	m, od, k2,  9, 10, 11,  8
+	add_update	m, ev, k3, 10, 11,  8,  9
+
+	add_update	p, od, k3, 11,  8,  9, 10
+	add_only	p, ev, k3,  9
+	add_only	p, od, k3, 10
+	add_only	p, ev, k3, 11
+	add_only	p, od
+
+	/* update state */
+	add		dgbv.2s, dgbv.2s, dg1v.2s
+	add		dgav.4s, dgav.4s, dg0v.4s
+
+	cbnz		w0, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if we have no total byte count in x4. In that case, the input
+	 * size was not a round multiple of the block size, and the padding is
+	 * handled by the C code.
+	 */
+	cbz		x4, 3f
+	movi		v9.2d, #0
+	mov		x8, #0x80000000
+	movi		v10.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d8, x8
+	mov		x4, #0
+	mov		v11.d[0], xzr
+	mov		v11.d[1], x7
+	b		2b
+
+	/* store new state */
+3:	str		dga, [x2]
+	str		dgb, [x2, #16]
+	ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..6fe83f37a750
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,174 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+				  u8 *head, long bytes);
+
+static int sha1_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+	return 0;
+}
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+	sctx->count += len;
+
+	if ((partial + len) >= SHA1_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SHA1_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buffer + partial, data, p);
+			data += p;
+			len -= p;
+		}
+
+		blocks = len / SHA1_BLOCK_SIZE;
+		len %= SHA1_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(16);
+		sha1_ce_transform(blocks, data, sctx->state,
+				  partial ? sctx->buffer : NULL, 0);
+		kernel_neon_end();
+
+		data += blocks * SHA1_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buffer + partial, data, len);
+	return 0;
+}
+
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	__be64 bits = cpu_to_be64(sctx->count << 3);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	u32 padlen = SHA1_BLOCK_SIZE
+		     - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
+
+	sha1_update(desc, padding, padlen);
+	sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha1_state){};
+	return 0;
+}
+
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int blocks;
+	int i;
+
+	if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
+		sha1_update(desc, data, len);
+		return sha1_final(desc, out);
+	}
+
+	/*
+	 * Use a fast path if the input is a multiple of 64 bytes. In
+	 * this case, there is no need to copy data around, and we can
+	 * perform the entire digest calculation in a single invocation
+	 * of sha1_ce_transform()
+	 */
+	blocks = len / SHA1_BLOCK_SIZE;
+
+	kernel_neon_begin_partial(16);
+	sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+	kernel_neon_end();
+
+	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha1_state){};
+	return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct sha1_state *dst = out;
+
+	*dst = *sctx;
+	return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct sha1_state const *src = in;
+
+	*sctx = *src;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.init			= sha1_init,
+	.update			= sha1_update,
+	.final			= sha1_final,
+	.finup			= sha1_finup,
+	.export			= sha1_export,
+	.import			= sha1_import,
+	.descsize		= sizeof(struct sha1_state),
+	.digestsize		= SHA1_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha1_state),
+	.base			= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..7f29fc031ea8
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,156 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	dga		.req	q20
+	dgav		.req	v20
+	dgb		.req	q21
+	dgbv		.req	v21
+
+	t0		.req	v22
+	t1		.req	v23
+
+	dg0q		.req	q24
+	dg0v		.req	v24
+	dg1q		.req	q25
+	dg1v		.req	v25
+	dg2q		.req	q26
+	dg2v		.req	v26
+
+	.macro		add_only, ev, rc, s0
+	mov		dg2v.16b, dg0v.16b
+	.ifeq		\ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha256h		dg0q, dg1q, t0.4s
+	sha256h2	dg1q, dg2q, t0.4s
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha256h		dg0q, dg1q, t1.4s
+	sha256h2	dg1q, dg2q, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, ev, rc, s0, s1, s2, s3
+	sha256su0	v\s0\().4s, v\s1\().4s
+	add_only	\ev, \rc, \s1
+	sha256su1	v\s0\().4s, v\s2\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA-256 round constants
+	 */
+	.align		4
+.Lsha2_rcon:
+	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+	/*
+	 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+	 *                        u8 *head, long bytes)
+	 */
+ENTRY(sha2_ce_transform)
+	/* load round constants */
+	adr		x8, .Lsha2_rcon
+	ld1		{ v0.4s- v3.4s}, [x8], #64
+	ld1		{ v4.4s- v7.4s}, [x8], #64
+	ld1		{ v8.4s-v11.4s}, [x8], #64
+	ld1		{v12.4s-v15.4s}, [x8]
+
+	/* load state */
+	ldp		dga, dgb, [x2]
+
+	/* load partial input (if supplied) */
+	cbz		x3, 0f
+	ld1		{v16.4s-v19.4s}, [x3]
+	b		1f
+
+	/* load input */
+0:	ld1		{v16.4s-v19.4s}, [x1], #64
+	sub		w0, w0, #1
+
+1:
+CPU_LE(	rev32		v16.16b, v16.16b	)
+CPU_LE(	rev32		v17.16b, v17.16b	)
+CPU_LE(	rev32		v18.16b, v18.16b	)
+CPU_LE(	rev32		v19.16b, v19.16b	)
+
+2:	add		t0.4s, v16.4s, v0.4s
+	mov		dg0v.16b, dgav.16b
+	mov		dg1v.16b, dgbv.16b
+
+	add_update	0,  v1, 16, 17, 18, 19
+	add_update	1,  v2, 17, 18, 19, 16
+	add_update	0,  v3, 18, 19, 16, 17
+	add_update	1,  v4, 19, 16, 17, 18
+
+	add_update	0,  v5, 16, 17, 18, 19
+	add_update	1,  v6, 17, 18, 19, 16
+	add_update	0,  v7, 18, 19, 16, 17
+	add_update	1,  v8, 19, 16, 17, 18
+
+	add_update	0,  v9, 16, 17, 18, 19
+	add_update	1, v10, 17, 18, 19, 16
+	add_update	0, v11, 18, 19, 16, 17
+	add_update	1, v12, 19, 16, 17, 18
+
+	add_only	0, v13, 17
+	add_only	1, v14, 18
+	add_only	0, v15, 19
+	add_only	1
+
+	/* update state */
+	add		dgav.4s, dgav.4s, dg0v.4s
+	add		dgbv.4s, dgbv.4s, dg1v.4s
+
+	/* handled all input blocks? */
+	cbnz		w0, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if we have no total byte count in x4. In that case, the input
+	 * size was not a round multiple of the block size, and the padding is
+	 * handled by the C code.
+	 */
+	cbz		x4, 3f
+	movi		v17.2d, #0
+	mov		x8, #0x80000000
+	movi		v18.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d16, x8
+	mov		x4, #0
+	mov		v19.d[0], xzr
+	mov		v19.d[1], x7
+	b		2b
+
+	/* store new state */
+3:	stp		dga, dgb, [x2]
+	ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..c294e67d3925
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,255 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+				 u8 *head, long bytes);
+
+static int sha224_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha256_state){
+		.state = {
+			SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+			SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+		}
+	};
+	return 0;
+}
+
+static int sha256_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha256_state){
+		.state = {
+			SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+			SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+		}
+	};
+	return 0;
+}
+
+static int sha2_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+	sctx->count += len;
+
+	if ((partial + len) >= SHA256_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SHA256_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buf + partial, data, p);
+			data += p;
+			len -= p;
+		}
+
+		blocks = len / SHA256_BLOCK_SIZE;
+		len %= SHA256_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(28);
+		sha2_ce_transform(blocks, data, sctx->state,
+				  partial ? sctx->buf : NULL, 0);
+		kernel_neon_end();
+
+		data += blocks * SHA256_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buf + partial, data, len);
+	return 0;
+}
+
+static void sha2_final(struct shash_desc *desc)
+{
+	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be64 bits = cpu_to_be64(sctx->count << 3);
+	u32 padlen = SHA256_BLOCK_SIZE
+		     - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
+
+	sha2_update(desc, padding, padlen);
+	sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_final(desc);
+
+	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_final(desc);
+
+	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static void sha2_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	int blocks;
+
+	if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
+		sha2_update(desc, data, len);
+		sha2_final(desc);
+		return;
+	}
+
+	/*
+	 * Use a fast path if the input is a multiple of 64 bytes. In
+	 * this case, there is no need to copy data around, and we can
+	 * perform the entire digest calculation in a single invocation
+	 * of sha2_ce_transform()
+	 */
+	blocks = len / SHA256_BLOCK_SIZE;
+
+	kernel_neon_begin_partial(28);
+	sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+	kernel_neon_end();
+	data += blocks * SHA256_BLOCK_SIZE;
+}
+
+static int sha224_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_finup(desc, data, len);
+
+	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_finup(desc, data, len);
+
+	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha2_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state *dst = out;
+
+	*dst = *sctx;
+	return 0;
+}
+
+static int sha2_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state const *src = in;
+
+	*sctx = *src;
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha224_init,
+	.update			= sha2_update,
+	.final			= sha224_final,
+	.finup			= sha224_finup,
+	.export			= sha2_export,
+	.import			= sha2_import,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA224_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha256_state),
+	.base			= {
+		.cra_name		= "sha224",
+		.cra_driver_name	= "sha224-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+}, {
+	.init			= sha256_init,
+	.update			= sha2_update,
+	.final			= sha256_final,
+	.finup			= sha256_finup,
+	.export			= sha2_export,
+	.import			= sha2_import,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA256_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha256_state),
+	.base			= {
+		.cra_name		= "sha256",
+		.cra_driver_name	= "sha256-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 79a642d199f2..cfe9860b2076 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -10,6 +10,7 @@ generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
+generic-y += early_ioremap.h
 generic-y += errno.h
 generic-y += ftrace.h
 generic-y += hw_irq.h
@@ -26,16 +27,17 @@ generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += pci.h
-generic-y += percpu.h
 generic-y += poll.h
 generic-y += posix_types.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += socket.h
 generic-y += sockios.h
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index d56ed11ba9a3..be56d33c5dbf 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -26,7 +26,13 @@
 
 #include <clocksource/arm_arch_timer.h>
 
-static inline void arch_timer_reg_write(int access, int reg, u32 val)
+/*
+ * These register accessors are marked inline so the compiler can
+ * nicely work out which register we want, and chuck away the rest of
+ * the code.
+ */
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 {
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
@@ -36,8 +42,6 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
@@ -47,17 +51,14 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
-	} else {
-		BUILD_BUG();
 	}
 
 	isb();
 }
 
-static inline u32 arch_timer_reg_read(int access, int reg)
+static __always_inline
+u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 {
 	u32 val;
 
@@ -69,8 +70,6 @@ static inline u32 arch_timer_reg_read(int access, int reg)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
@@ -80,11 +79,7 @@ static inline u32 arch_timer_reg_read(int access, int reg)
 		case ARCH_TIMER_REG_TVAL:
 			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
 			break;
-		default:
-			BUILD_BUG();
 		}
-	} else {
-		BUILD_BUG();
 	}
 
 	return val;
@@ -97,19 +92,49 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline void __cpuinit arch_counter_set_user_access(void)
+static inline u32 arch_timer_get_cntkctl(void)
 {
 	u32 cntkctl;
-
-	/* Disable user access to the timers and the physical counter. */
 	asm volatile("mrs	%0, cntkctl_el1" : "=r" (cntkctl));
-	cntkctl &= ~((3 << 8) | (1 << 0));
+	return cntkctl;
+}
 
-	/* Enable user access to the virtual counter and frequency. */
-	cntkctl |= (1 << 1);
+static inline void arch_timer_set_cntkctl(u32 cntkctl)
+{
 	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
 }
 
+static inline void __cpuinit arch_counter_set_user_access(void)
+{
+	u32 cntkctl = arch_timer_get_cntkctl();
+
+	/* Disable user access to the timers and the physical counter */
+	/* Also disable virtual event stream */
+	cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
+			| ARCH_TIMER_USR_VT_ACCESS_EN
+			| ARCH_TIMER_VIRT_EVT_EN
+			| ARCH_TIMER_USR_PCT_ACCESS_EN);
+
+	/* Enable user access to the virtual counter */
+	cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+
+	arch_timer_set_cntkctl(cntkctl);
+}
+
+static inline void arch_timer_evtstrm_enable(int divider)
+{
+	u32 cntkctl = arch_timer_get_cntkctl();
+	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
+	/* Set the divider and enable virtual event stream */
+	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
+			| ARCH_TIMER_VIRT_EVT_EN;
+	arch_timer_set_cntkctl(cntkctl);
+	elf_hwcap |= HWCAP_EVTSTRM;
+#ifdef CONFIG_COMPAT
+	compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
+#endif
+}
+
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5aceb83b3f5c..fd3e3924041b 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -115,3 +115,34 @@ lr	.req	x30		// link register
 	.align	7
 	b	\label
 	.endm
+
+/*
+ * Select code when configured for BE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_BE(code...) code
+#else
+#define CPU_BE(code...)
+#endif
+
+/*
+ * Select code when configured for LE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...) code
+#endif
+
+/*
+ * Define a macro that constructs a 64-bit value by concatenating two
+ * 32-bit registers. Note that on big endian systems the order of the
+ * registers is swapped.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	.macro	regs_to_64, rd, lbits, hbits
+#else
+	.macro	regs_to_64, rd, hbits, lbits
+#endif
+	orr	\rd, \lbits, \hbits, lsl #32
+	.endm
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 836364468571..a049bf7f5150 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_add_return(int i, atomic_t *v)
@@ -64,14 +63,15 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_add_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	add	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -86,8 +86,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_sub_return(int i, atomic_t *v)
@@ -96,14 +95,15 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_sub_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	sub	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -112,17 +112,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	unsigned long tmp;
 	int oldval;
 
+	smp_mb();
+
 	asm volatile("// atomic_cmpxchg\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	cmp	%w1, %w3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %w4, %2\n"
+"	stxr	%w0, %w4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -173,7 +176,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
  */
 #define ATOMIC64_INIT(i) { (i) }
 
-#define atomic64_read(v)	(*(volatile long long *)&(v)->counter)
+#define atomic64_read(v)	(*(volatile long *)&(v)->counter)
 #define atomic64_set(v,i)	(((v)->counter) = (i))
 
 static inline void atomic64_add(u64 i, atomic64_t *v)
@@ -187,8 +190,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_add_return(long i, atomic64_t *v)
@@ -197,14 +199,15 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_add_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	add	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -219,8 +222,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
@@ -229,14 +231,15 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_sub_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	sub	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -245,17 +248,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 	long oldval;
 	unsigned long res;
 
+	smp_mb();
+
 	asm volatile("// atomic64_cmpxchg\n"
-"1:	ldaxr	%1, %2\n"
+"1:	ldxr	%1, %2\n"
 "	cmp	%1, %3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %4, %2\n"
+"	stxr	%w0, %4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -267,11 +273,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_dec_if_positive\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
 "	b.mi	2f\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b\n"
+"	dmb	ish\n"
 "2:"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
diff --git a/arch/arm64/include/asm/bL_switcher.h b/arch/arm64/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..2bee500b7f54
--- /dev/null
+++ b/arch/arm64/include/asm/bL_switcher.h
@@ -0,0 +1,54 @@
+/*
+ * Based on the stubs for the ARM implementation which is:
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+static inline int bL_switch_request(unsigned int cpu,
+				    unsigned int new_cluster_id)
+{
+	return -ENOTSUPP;
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE	0
+#define BL_NOTIFY_POST_ENABLE	1
+#define BL_NOTIFY_PRE_DISABLE	2
+#define BL_NOTIFY_POST_DISABLE	3
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+
+#endif
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..709f1f6d6bbd 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,20 +25,71 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
-#define dsb()		asm volatile("dsb sy" : : : "memory")
+#define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
 
-#define mb()		dsb()
-#define rmb()		asm volatile("dsb ld" : : : "memory")
-#define wmb()		asm volatile("dsb st" : : : "memory")
+#define mb()		dsb(sy)
+#define rmb()		dsb(ld)
+#define wmb()		dsb(st)
 
 #ifndef CONFIG_SMP
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #else
-#define smp_mb()	asm volatile("dmb ish" : : : "memory")
-#define smp_rmb()	asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb()	asm volatile("dmb ishst" : : : "memory")
+
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	dmb(ishld)
+#define smp_wmb()	dmb(ishst)
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1;						\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	}								\
+	___p1;								\
+})
+
 #endif
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 0c13554965b8..f2defe1c380c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -85,6 +85,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
 }
 
 /*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
+/*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
  * space" model to handle this.
@@ -116,7 +123,7 @@ extern void flush_dcache_page(struct page *);
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
-	dsb();
+	dsb(ish);
 }
 
 #define flush_dcache_mmap_lock(mapping) \
@@ -124,9 +131,6 @@ static inline void __flush_icache_all(void)
 #define flush_dcache_mmap_unlock(mapping) \
 	spin_unlock_irq(&(mapping)->tree_lock)
 
-#define flush_icache_user_range(vma,page,addr,len) \
-	flush_dcache_page(page)
-
 /*
  * We don't appear to need to do anything here.  In fact, if we did, we'd
  * duplicate cache flushing elsewhere performed by flush_dcache_page().
@@ -134,19 +138,10 @@ static inline void __flush_icache_all(void)
 #define flush_icache_page(vma,page)	do { } while (0)
 
 /*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages.  On non-VIPT
- * caches, since the direct-mappings of these pages may contain cached
- * data, we need to do a full cache flush to ensure that writebacks
- * don't corrupt data placed into these pages via the new mappings.
+ * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache).
  */
 static inline void flush_cache_vmap(unsigned long start, unsigned long end)
 {
-	/*
-	 * set_pte_at() called from vmap_pte_range() does not
-	 * have a DSB after cleaning the cache line.
-	 */
-	dsb();
 }
 
 static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 8a8ce0e73a38..a84d4c8acbbe 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,49 +29,55 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	switch (size) {
 	case 1:
 		asm volatile("//	__xchg1\n"
-		"1:	ldaxrb	%w0, %2\n"
+		"1:	ldxrb	%w0, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
-		"1:	ldaxrh	%w0, %2\n"
+		"1:	ldxrh	%w0, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
-		"1:	ldaxr	%w0, %2\n"
+		"1:	ldxr	%w0, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
-		"1:	ldaxr	%0, %2\n"
+		"1:	ldxr	%0, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	default:
 		BUILD_BUG();
 	}
 
+	smp_mb();
 	return ret;
 }
 
 #define xchg(ptr,x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+	__ret; \
+})
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 				      unsigned long new, int size)
@@ -158,19 +164,27 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	return ret;
 }
 
-#define cmpxchg(ptr,o,n)						\
-	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
-					  (unsigned long)(o),		\
-					  (unsigned long)(n),		\
-					  sizeof(*(ptr))))
-
-#define cmpxchg_local(ptr,o,n)						\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),				\
-				       (unsigned long)(o),		\
-				       (unsigned long)(n),		\
-				       sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n) \
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+	__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
+		sizeof(*(ptr))); \
+	__ret; \
+})
+
+#define cmpxchg_local(ptr, o, n) \
+({ \
+	__typeof__(*(ptr)) __ret; \
+	__ret = (__typeof__(*(ptr))) \
+	__cmpxchg((ptr), (unsigned long)(o), \
+		(unsigned long)(n), sizeof(*(ptr))); \
+	__ret; \
+})
 
 #define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
 #define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
 
+#define cmpxchg64_relaxed(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
+
 #endif	/* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index c30a548cee56..56de5aadede2 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -26,7 +26,11 @@
 #include <linux/ptrace.h>
 
 #define COMPAT_USER_HZ		100
+#ifdef __AARCH64EB__
+#define COMPAT_UTS_MACHINE	"armv8b\0\0"
+#else
 #define COMPAT_UTS_MACHINE	"armv8l\0\0"
+#endif
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
@@ -73,13 +77,23 @@ struct compat_timeval {
 };
 
 struct compat_stat {
+#ifdef __AARCH64EB__
+	short		st_dev;
+	short		__pad1;
+#else
 	compat_dev_t	st_dev;
+#endif
 	compat_ino_t	st_ino;
 	compat_mode_t	st_mode;
 	compat_ushort_t	st_nlink;
 	__compat_uid16_t	st_uid;
 	__compat_gid16_t	st_gid;
+#ifdef __AARCH64EB__
+	short		st_rdev;
+	short		__pad2;
+#else
 	compat_dev_t	st_rdev;
+#endif
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
@@ -214,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
 
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
@@ -291,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
 
 #else /* !CONFIG_COMPAT */
 
-static inline int is_compat_task(void)
-{
-	return 0;
-}
-
 static inline int is_compat_thread(struct thread_info *thread)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
new file mode 100644
index 000000000000..152413076503
--- /dev/null
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPU_OPS_H
+#define __ASM_CPU_OPS_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+
+struct device_node;
+
+/**
+ * struct cpu_operations - Callback operations for hotplugging CPUs.
+ *
+ * @name:	Name of the property as appears in a devicetree cpu node's
+ *		enable-method property.
+ * @cpu_init:	Reads any data necessary for a specific enable-method from the
+ *		devicetree, for a given cpu node and proposed logical id.
+ * @cpu_prepare: Early one-time preparation step for a cpu. If there is a
+ *		mechanism for doing so, tests whether it is possible to boot
+ *		the given CPU.
+ * @cpu_boot:	Boots a cpu into the kernel.
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ *		synchronisation. Called from the cpu being booted.
+ * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific
+ * 		reason, which will cause the hot unplug to be aborted. Called
+ * 		from the cpu to be killed.
+ * @cpu_die:	Makes a cpu leave the kernel. Must not fail. Called from the
+ *		cpu being killed.
+ * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
+ *               to wrong parameters or error conditions. Called from the
+ *               CPU being suspended. Must be called with IRQs disabled.
+ */
+struct cpu_operations {
+	const char	*name;
+	int		(*cpu_init)(struct device_node *, unsigned int);
+	int		(*cpu_prepare)(unsigned int);
+	int		(*cpu_boot)(unsigned int);
+	void		(*cpu_postboot)(void);
+#ifdef CONFIG_HOTPLUG_CPU
+	int		(*cpu_disable)(unsigned int cpu);
+	void		(*cpu_die)(unsigned int cpu);
+#endif
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+	int		(*cpu_suspend)(unsigned long);
+#endif
+};
+
+extern const struct cpu_operations *cpu_ops[NR_CPUS];
+extern int __init cpu_read_ops(struct device_node *dn, int cpu);
+extern void __init cpu_read_bootcpu_ops(void);
+
+#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000000..cd4ac0516488
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index cf2749488cd4..27f54a7cc81b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -16,32 +16,36 @@
 #ifndef __ASM_CPUTYPE_H
 #define __ASM_CPUTYPE_H
 
-#define ID_MIDR_EL1		"midr_el1"
-#define ID_MPIDR_EL1		"mpidr_el1"
-#define ID_CTR_EL0		"ctr_el0"
-
-#define ID_AA64PFR0_EL1		"id_aa64pfr0_el1"
-#define ID_AA64DFR0_EL1		"id_aa64dfr0_el1"
-#define ID_AA64AFR0_EL1		"id_aa64afr0_el1"
-#define ID_AA64ISAR0_EL1	"id_aa64isar0_el1"
-#define ID_AA64MMFR0_EL1	"id_aa64mmfr0_el1"
-
 #define INVALID_HWID		ULONG_MAX
 
 #define MPIDR_HWID_BITMASK	0xff00ffffff
 
+#define MPIDR_LEVEL_BITS_SHIFT	3
+#define MPIDR_LEVEL_BITS	(1 << MPIDR_LEVEL_BITS_SHIFT)
+#define MPIDR_LEVEL_MASK	((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_LEVEL_SHIFT(level) \
+	(((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT)
+
+#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
+	((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
+
 #define read_cpuid(reg) ({						\
 	u64 __val;							\
-	asm("mrs	%0, " reg : "=r" (__val));			\
+	asm("mrs	%0, " #reg : "=r" (__val));			\
 	__val;								\
 })
 
 #define ARM_CPU_IMP_ARM		0x41
+#define ARM_CPU_IMP_APM		0x50
 
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_FOUNDATION	0xD000
+#define ARM_CPU_PART_CORTEX_A53	0xD030
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 
+#define APM_CPU_PART_POTENZA	0x0000
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -51,12 +55,12 @@
  */
 static inline u32 __attribute_const__ read_cpuid_id(void)
 {
-	return read_cpuid(ID_MIDR_EL1);
+	return read_cpuid(MIDR_EL1);
 }
 
 static inline u64 __attribute_const__ read_cpuid_mpidr(void)
 {
-	return read_cpuid(ID_MPIDR_EL1);
+	return read_cpuid(MPIDR_EL1);
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
@@ -71,7 +75,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
 
 static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
-	return read_cpuid(ID_CTR_EL0);
+	return read_cpuid(CTR_EL0);
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7eaa0b302493..aab72ce22348 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -18,6 +18,15 @@
 
 #ifdef __KERNEL__
 
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS		(1 << 0)
+#define DBG_SPSR_SS		(1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE		(1 << 13)
+#define DBG_MDSCR_MDE		(1 << 15)
+#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
 #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
 
 /* AArch64 */
@@ -26,10 +35,52 @@
 #define DBG_ESR_EVT_HWWP	0x2
 #define DBG_ESR_EVT_BRK		0x6
 
-enum debug_el {
-	DBG_ACTIVE_EL0 = 0,
-	DBG_ACTIVE_EL1,
-};
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE		4
+
+/*
+ * ESR values expected for dynamic and compile time BRK instruction
+ */
+#define DBG_ESR_VAL_BRK(x)	(0xf2000000 | ((x) & 0xfffff))
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_IMM		0x400
+#define KDBG_COMPILED_DBG_BRK_IMM	0x401
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON	0xd4200000
+
+/*
+ * Extract byte from BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \
+	((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
+
+/*
+ * Extract byte from BRK #imm16
+ */
+#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \
+	(((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+
+#define KGDB_DYN_DGB_BRK_BYTE(x) \
+	(KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x))
+
+#define  KGDB_DYN_BRK_INS_BYTE0  KGDB_DYN_DGB_BRK_BYTE(0)
+#define  KGDB_DYN_BRK_INS_BYTE1  KGDB_DYN_DGB_BRK_BYTE(1)
+#define  KGDB_DYN_BRK_INS_BYTE2  KGDB_DYN_DGB_BRK_BYTE(2)
+#define  KGDB_DYN_BRK_INS_BYTE3  KGDB_DYN_DGB_BRK_BYTE(3)
+
+#define CACHE_FLUSH_IS_SAFE		1
 
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
@@ -43,27 +94,36 @@ enum debug_el {
 #ifndef __ASSEMBLY__
 struct task_struct;
 
-#define local_dbg_save(flags)							\
-	do {									\
-		typecheck(unsigned long, flags);				\
-		asm volatile(							\
-		"mrs	%0, daif			// local_dbg_save\n"	\
-		"msr	daifset, #8"						\
-		: "=r" (flags) : : "memory");					\
-	} while (0)
-
-#define local_dbg_restore(flags)						\
-	do {									\
-		typecheck(unsigned long, flags);				\
-		asm volatile(							\
-		"msr	daif, %0			// local_dbg_restore\n"	\
-		: : "r" (flags) : "memory");					\
-	} while (0)
-
 #define DBG_ARCH_ID_RESERVED	0	/* In case of ptrace ABI updates. */
 
+#define DBG_HOOK_HANDLED	0
+#define DBG_HOOK_ERROR		1
+
+struct step_hook {
+	struct list_head node;
+	int (*fn)(struct pt_regs *regs, unsigned int esr);
+};
+
+void register_step_hook(struct step_hook *hook);
+void unregister_step_hook(struct step_hook *hook);
+
+struct break_hook {
+	struct list_head node;
+	u32 esr_val;
+	u32 esr_mask;
+	int (*fn)(struct pt_regs *regs, unsigned int esr);
+};
+
+void register_break_hook(struct break_hook *hook);
+void unregister_break_hook(struct break_hook *hook);
+
 u8 debug_monitors_arch(void);
 
+enum debug_el {
+	DBG_ACTIVE_EL0 = 0,
+	DBG_ACTIVE_EL1,
+};
+
 void enable_debug_monitors(enum debug_el el);
 void disable_debug_monitors(enum debug_el el);
 
@@ -83,6 +143,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 }
 #endif
 
+#ifdef CONFIG_COMPAT
+int aarch32_break_handler(struct pt_regs *regs);
+#else
+static int aarch32_break_handler(struct pt_regs *regs)
+{
+	return -EFAULT;
+}
+#endif
+
 #endif	/* __ASSEMBLY */
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 0d8453c755a8..cf98b362094b 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -18,6 +18,9 @@
 
 struct dev_archdata {
 	struct dma_map_ops *dma_ops;
+#ifdef CONFIG_IOMMU_API
+	void *iommu;			/* private IOMMU data */
+#endif
 };
 
 struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h
new file mode 100644
index 000000000000..14c4c0ca7f2a
--- /dev/null
+++ b/arch/arm64/include/asm/dma-contiguous.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ASM_DMA_CONTIGUOUS_H
+#define _ASM_DMA_CONTIGUOUS_H
+
+#ifdef __KERNEL__
+#ifdef CONFIG_DMA_CMA
+
+#include <linux/types.h>
+
+static inline void
+dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { }
+
+#endif
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 994776894198..00a41aab4a37 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -25,7 +25,10 @@
 
 #define ARCH_HAS_DMA_GET_REQUIRED_MASK
 
+#define DMA_ERROR_CODE	(~(dma_addr_t)0)
 extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops coherent_swiotlb_dma_ops;
+extern struct dma_map_ops noncoherent_swiotlb_dma_ops;
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
@@ -35,6 +38,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 		return dev->archdata.dma_ops;
 }
 
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+	dev->archdata.dma_ops = ops;
+}
+
 #include <asm-generic/dma-mapping-common.h>
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
@@ -81,8 +89,12 @@ static inline void dma_mark_clean(void *addr, size_t size)
 {
 }
 
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flags)
+#define dma_alloc_coherent(d, s, h, f)	dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, h, f)	dma_free_attrs(d, s, h, f, NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flags,
+				    struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	void *vaddr;
@@ -90,13 +102,14 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 	if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr))
 		return vaddr;
 
-	vaddr = ops->alloc(dev, size, dma_handle, flags, NULL);
+	vaddr = ops->alloc(dev, size, dma_handle, flags, attrs);
 	debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr);
 	return vaddr;
 }
 
-static inline void dma_free_coherent(struct device *dev, size_t size,
-				     void *vaddr, dma_addr_t dev_addr)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dev_addr,
+				  struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
@@ -104,7 +117,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 		return;
 
 	debug_dma_free_coherent(dev, size, vaddr, dev_addr);
-	ops->free(dev, size, vaddr, dev_addr, NULL);
+	ops->free(dev, size, vaddr, dev_addr, attrs);
 }
 
 /*
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
new file mode 100644
index 000000000000..5a46c4e7f539
--- /dev/null
+++ b/arch/arm64/include/asm/efi.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/io.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+extern void efi_idmap_init(void);
+#else
+#define efi_init()
+#define efi_idmap_init()
+#endif
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index fe32c0e4ac01..01d3aab64b79 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t;
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 typedef struct user_fpsimd_state elf_fpregset_t;
 
-#define EM_AARCH64		183
-
 /*
  * AArch64 static relocation types.
  */
@@ -92,11 +90,24 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  * These are used to set parameters in the core dumps.
  */
 #define ELF_CLASS	ELFCLASS64
+#ifdef __AARCH64EB__
+#define ELF_DATA	ELFDATA2MSB
+#else
 #define ELF_DATA	ELFDATA2LSB
+#endif
 #define ELF_ARCH	EM_AARCH64
 
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
 #define ELF_PLATFORM_SIZE	16
+#ifdef __AARCH64EB__
+#define ELF_PLATFORM		("aarch64_be")
+#else
 #define ELF_PLATFORM		("aarch64")
+#endif
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -151,8 +162,12 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
 #ifdef CONFIG_COMPAT
-#define EM_ARM				40
+
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM		("v8b")
+#else
 #define COMPAT_ELF_PLATFORM		("v8l")
+#endif
 
 #define COMPAT_ELF_ET_DYN_BASE		(randomize_et_dyn(2 * TASK_SIZE_32 / 3))
 
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 78834123a32e..c4a7f940b387 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -42,7 +42,7 @@
 #define ESR_EL1_EC_SP_ALIGN	(0x26)
 #define ESR_EL1_EC_FP_EXC32	(0x28)
 #define ESR_EL1_EC_FP_EXC64	(0x2C)
-#define ESR_EL1_EC_SERRROR	(0x2F)
+#define ESR_EL1_EC_SERROR	(0x2F)
 #define ESR_EL1_EC_BREAKPT_EL0	(0x30)
 #define ESR_EL1_EC_BREAKPT_EL1	(0x31)
 #define ESR_EL1_EC_SOFTSTP_EL0	(0x32)
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 000000000000..5f7bfe6df723
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,67 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
+ *
+ * Adapted from arch/x86_64 version.
+ *
+ */
+
+#ifndef _ASM_ARM64_FIXMAP_H
+#define _ASM_ARM64_FIXMAP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * These 'compile-time allocated' memory buffers are
+ * page-sized. Use set_fixmap(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ */
+enum fixed_addresses {
+	FIX_EARLYCON_MEM_BASE,
+	__end_of_permanent_fixed_addresses,
+
+	/*
+	 * Temporary boot-time mappings, used by early_ioremap(),
+	 * before ioremap() is functional.
+	 */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define NR_FIX_BTMAPS		4
+#else
+#define NR_FIX_BTMAPS		64
+#endif
+#define FIX_BTMAPS_SLOTS	7
+#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+	__end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
+
+#define FIXMAP_PAGE_IO     __pgprot(PROT_DEVICE_nGnRE)
+
+extern void __early_set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags);
+
+#define __set_fixmap __early_set_fixmap
+
+#include <asm-generic/fixmap.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000000..c5534facf941
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,59 @@
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+	/* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/*
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
+	 */
+	return addr;
+}
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user realy wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+	return is_compat_task();
+}
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index c582fa316366..5f750dc96e0f 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -24,12 +24,14 @@
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
 	asm volatile(							\
-"1:	ldaxr	%w1, %2\n"						\
+"1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 "2:	stlxr	%w3, %w0, %2\n"						\
 "	cbnz	%w3, 1b\n"						\
+"	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
+"	.align	2\n"							\
 "4:	mov	%w0, %w5\n"						\
 "	b	3b\n"							\
 "	.popsection\n"							\
@@ -39,7 +41,7 @@
 "	.popsection\n"							\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
 	: "r" (oparg), "Ir" (-EFAULT)					\
-	: "cc", "memory")
+	: "memory")
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
@@ -110,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
 "	cbnz	%w3, 3f\n"
 "2:	stlxr	%w3, %w5, %2\n"
 "	cbnz	%w3, 1b\n"
+"	dmb	ish\n"
 "3:\n"
 "	.pushsection .fixup,\"ax\"\n"
 "4:	mov	%w0, %w6\n"
@@ -126,7 +129,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "	.popsection\n"
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
-	: "cc", "memory");
+	: "memory");
 
 	*uval = val;
 	return ret;
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 990c051e7829..ae4801d77514 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	4
+#define NR_IPI	5
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
new file mode 100644
index 000000000000..5b7ca8ace95f
--- /dev/null
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -0,0 +1,117 @@
+/*
+ * arch/arm64/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_HUGETLB_H
+#define __ASM_HUGETLB_H
+
+#include <asm-generic/hugetlb.h>
+#include <asm/page.h>
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6d4482fa35bc..024c46183c3c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -30,6 +30,13 @@
 #define COMPAT_HWCAP_IDIVA	(1 << 17)
 #define COMPAT_HWCAP_IDIVT	(1 << 18)
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_EVTSTRM	(1 << 21)
+
+#define COMPAT_HWCAP2_AES	(1 << 0)
+#define COMPAT_HWCAP2_PMULL	(1 << 1)
+#define COMPAT_HWCAP2_SHA1	(1 << 2)
+#define COMPAT_HWCAP2_SHA2	(1 << 3)
+#define COMPAT_HWCAP2_CRC32	(1 << 4)
 
 #ifndef __ASSEMBLY__
 /*
@@ -37,12 +44,13 @@
  * instruction set this cpu supports.
  */
 #define ELF_HWCAP		(elf_hwcap)
-#define COMPAT_ELF_HWCAP	(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
-				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
 
-extern unsigned int elf_hwcap;
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
+#define COMPAT_ELF_HWCAP2	(compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
+#endif
+
+extern unsigned long elf_hwcap;
 #endif
 #endif
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
new file mode 100644
index 000000000000..62e7b8bcd2dc
--- /dev/null
+++ b/arch/arm64/include/asm/insn.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef	__ASM_INSN_H
+#define	__ASM_INSN_H
+
+#include <linux/types.h>
+
+/* A64 instructions are always 32 bits. */
+#define	AARCH64_INSN_SIZE		4
+
+#ifndef __ASSEMBLY__
+
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section C3.1 "A64 instruction index by encoding":
+ * AArch64 main encoding table
+ *  Bit position
+ *   28 27 26 25	Encoding Group
+ *   0  0  -  -		Unallocated
+ *   1  0  0  -		Data processing, immediate
+ *   1  0  1  -		Branch, exception generation and system instructions
+ *   -  1  -  0		Loads and stores
+ *   -  1  0  1		Data processing - register
+ *   0  1  1  1		Data processing - SIMD and floating point
+ *   1  1  1  1		Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+enum aarch64_insn_encoding_class {
+	AARCH64_INSN_CLS_UNKNOWN,	/* UNALLOCATED */
+	AARCH64_INSN_CLS_DP_IMM,	/* Data processing - immediate */
+	AARCH64_INSN_CLS_DP_REG,	/* Data processing - register */
+	AARCH64_INSN_CLS_DP_FPSIMD,	/* Data processing - SIMD and FP */
+	AARCH64_INSN_CLS_LDST,		/* Loads and stores */
+	AARCH64_INSN_CLS_BR_SYS,	/* Branch, exception generation and
+					 * system instructions */
+};
+
+enum aarch64_insn_hint_op {
+	AARCH64_INSN_HINT_NOP	= 0x0 << 5,
+	AARCH64_INSN_HINT_YIELD	= 0x1 << 5,
+	AARCH64_INSN_HINT_WFE	= 0x2 << 5,
+	AARCH64_INSN_HINT_WFI	= 0x3 << 5,
+	AARCH64_INSN_HINT_SEV	= 0x4 << 5,
+	AARCH64_INSN_HINT_SEVL	= 0x5 << 5,
+};
+
+enum aarch64_insn_imm_type {
+	AARCH64_INSN_IMM_ADR,
+	AARCH64_INSN_IMM_26,
+	AARCH64_INSN_IMM_19,
+	AARCH64_INSN_IMM_16,
+	AARCH64_INSN_IMM_14,
+	AARCH64_INSN_IMM_12,
+	AARCH64_INSN_IMM_9,
+	AARCH64_INSN_IMM_MAX
+};
+
+enum aarch64_insn_branch_type {
+	AARCH64_INSN_BRANCH_NOLINK,
+	AARCH64_INSN_BRANCH_LINK,
+};
+
+#define	__AARCH64_INSN_FUNCS(abbr, mask, val)	\
+static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
+{ return (code & (mask)) == (val); } \
+static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
+{ return (val); }
+
+__AARCH64_INSN_FUNCS(b,		0xFC000000, 0x14000000)
+__AARCH64_INSN_FUNCS(bl,	0xFC000000, 0x94000000)
+__AARCH64_INSN_FUNCS(svc,	0xFFE0001F, 0xD4000001)
+__AARCH64_INSN_FUNCS(hvc,	0xFFE0001F, 0xD4000002)
+__AARCH64_INSN_FUNCS(smc,	0xFFE0001F, 0xD4000003)
+__AARCH64_INSN_FUNCS(brk,	0xFFE0001F, 0xD4200000)
+__AARCH64_INSN_FUNCS(hint,	0xFFFFF01F, 0xD503201F)
+
+#undef	__AARCH64_INSN_FUNCS
+
+bool aarch64_insn_is_nop(u32 insn);
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm);
+u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+				enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op);
+u32 aarch64_insn_gen_nop(void);
+
+bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+#endif  /* __ASSEMBLY__ */
+
+#endif	/* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 2e12258aa7e4..732e3d51c2cb 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -26,6 +26,7 @@
 #include <asm/byteorder.h>
 #include <asm/barrier.h>
 #include <asm/pgtable.h>
+#include <asm/early_ioremap.h>
 
 /*
  * Generic IO read/write.  These perform native-endian accesses.
@@ -118,7 +119,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  *  I/O port access primitives.
  */
 #define IO_SPACE_LIMIT		0xffff
-#define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_2M))
+#define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_32M))
 
 static inline u8 inb(unsigned long addr)
 {
@@ -224,19 +225,13 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
  */
 extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
 extern void __iounmap(volatile void __iomem *addr);
-
-#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
+extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
 #define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
 #define iounmap				__iounmap
 
-#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF)
-#define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-
 #define ARCH_HAS_IOREMAP_WC
 #include <asm-generic/iomap.h>
 
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 0332fc077f6e..e1f7ecdde11f 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,6 +4,7 @@
 #include <asm-generic/irq.h>
 
 extern void (*handle_arch_irq)(struct pt_regs *);
+extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 #endif
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index aa11943b8502..0ed52c691868 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -87,5 +87,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 	return flags & PSR_I_BIT;
 }
 
+/*
+ * save and restore debug state
+ */
+#define local_dbg_save(flags)						\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"mrs    %0, daif		// local_dbg_save\n"	\
+		"msr    daifset, #8"					\
+		: "=r" (flags) : : "memory");				\
+	} while (0)
+
+#define local_dbg_restore(flags)					\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		asm volatile(						\
+		"msr    daif, %0		// local_dbg_restore\n"	\
+		: : "r" (flags) : "memory");				\
+	} while (0)
+
+#define local_dbg_enable()	asm("msr	daifclr, #8" : : : "memory")
+#define local_dbg_disable()	asm("msr	daifset, #8" : : : "memory")
+
 #endif
 #endif
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
new file mode 100644
index 000000000000..076a1c714049
--- /dev/null
+++ b/arch/arm64/include/asm/jump_label.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Based on arch/arm/include/asm/jump_label.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_JUMP_LABEL_H
+#define __ASM_JUMP_LABEL_H
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#ifdef __KERNEL__
+
+#define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
+
+static __always_inline bool arch_static_branch(struct static_key *key)
+{
+	asm goto("1: nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 3\n\t"
+		 ".quad 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 :  :  "i"(key) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#endif /* __KERNEL__ */
+
+typedef u64 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif	/* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 000000000000..3c8aafc1082f
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,84 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef	__ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * gdb is expecting the following registers layout.
+ *
+ * General purpose regs:
+ *     r0-r30: 64 bit
+ *     sp,pc : 64 bit
+ *     pstate  : 64 bit
+ *     Total: 34
+ * FPU regs:
+ *     f0-f31: 128 bit
+ *     Total: 32
+ * Extra regs
+ *     fpsr & fpcr: 32 bit
+ *     Total: 2
+ *
+ */
+
+#define _GP_REGS		34
+#define _FP_REGS		32
+#define _EXTRA_REGS		2
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES		(_GP_REGS * 8)
+#define DBG_MAX_REG_NUM		(_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX			2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES	((_GP_REGS * 8) + (_FP_REGS * 16) + \
+			(_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000000000000..7fd3e27e3ccc
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID		(UL(1) << 33)
+#define HCR_CD		(UL(1) << 32)
+#define HCR_RW_SHIFT	31
+#define HCR_RW		(UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM	(UL(1) << 30)
+#define HCR_HCD		(UL(1) << 29)
+#define HCR_TDZ		(UL(1) << 28)
+#define HCR_TGE		(UL(1) << 27)
+#define HCR_TVM		(UL(1) << 26)
+#define HCR_TTLB	(UL(1) << 25)
+#define HCR_TPU		(UL(1) << 24)
+#define HCR_TPC		(UL(1) << 23)
+#define HCR_TSW		(UL(1) << 22)
+#define HCR_TAC		(UL(1) << 21)
+#define HCR_TIDCP	(UL(1) << 20)
+#define HCR_TSC		(UL(1) << 19)
+#define HCR_TID3	(UL(1) << 18)
+#define HCR_TID2	(UL(1) << 17)
+#define HCR_TID1	(UL(1) << 16)
+#define HCR_TID0	(UL(1) << 15)
+#define HCR_TWE		(UL(1) << 14)
+#define HCR_TWI		(UL(1) << 13)
+#define HCR_DC		(UL(1) << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(UL(1) << 10)
+#define HCR_FB		(UL(1) << 9)
+#define HCR_VA		(UL(1) << 8)
+#define HCR_VI		(UL(1) << 7)
+#define HCR_VF		(UL(1) << 6)
+#define HCR_AMO		(UL(1) << 5)
+#define HCR_IMO		(UL(1) << 4)
+#define HCR_FMO		(UL(1) << 3)
+#define HCR_PTW		(UL(1) << 2)
+#define HCR_SWIO	(UL(1) << 1)
+#define HCR_VM		(UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW:		64bit by default, can be overriden for 32bit VMs
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
+ * TSW:		Trap cache operations by set/way
+ * TWE:		Trap WFE
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE	(1 << 25)
+#define SCTLR_EL2_WXN	(1 << 19)
+#define SCTLR_EL2_I	(1 << 12)
+#define SCTLR_EL2_SA	(1 << 3)
+#define SCTLR_EL2_C	(1 << 2)
+#define SCTLR_EL2_A	(1 << 1)
+#define SCTLR_EL2_M	1
+#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
+			 SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI	(1 << 20)
+#define TCR_EL2_PS	(7 << 16)
+#define TCR_EL2_PS_40B	(2 << 16)
+#define TCR_EL2_TG0	(1 << 14)
+#define TCR_EL2_SH0	(3 << 12)
+#define TCR_EL2_ORGN0	(3 << 10)
+#define TCR_EL2_IRGN0	(3 << 8)
+#define TCR_EL2_T0SZ	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
+			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK	(7 << 16)
+#define VTCR_EL2_TG0_MASK	(1 << 14)
+#define VTCR_EL2_TG0_4K		(0 << 14)
+#define VTCR_EL2_TG0_64K	(1 << 14)
+#define VTCR_EL2_SH0_MASK	(3 << 12)
+#define VTCR_EL2_SH0_INNER	(3 << 12)
+#define VTCR_EL2_ORGN0_MASK	(3 << 10)
+#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
+#define VTCR_EL2_IRGN0_MASK	(3 << 8)
+#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
+#define VTCR_EL2_SL0_MASK	(3 << 6)
+#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_T0SZ_40B	24
+
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE	(1 << 16)
+#define HSTR_EL2_T(x)	(1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TFP	(1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA		(1 << 11)
+#define MDCR_EL2_TDOSA		(1 << 10)
+#define MDCR_EL2_TDA		(1 << 9)
+#define MDCR_EL2_TDE		(1 << 8)
+#define MDCR_EL2_HPME		(1 << 7)
+#define MDCR_EL2_TPM		(1 << 6)
+#define MDCR_EL2_TPMCR		(1 << 5)
+#define MDCR_EL2_HPMN_MASK	(0x1F)
+
+/* Exception Syndrome Register (ESR) bits */
+#define ESR_EL2_EC_SHIFT	(26)
+#define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISV_SHIFT	(24)
+#define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_SAS_SHIFT	(22)
+#define ESR_EL2_SAS		(3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SSE		(1 << 21)
+#define ESR_EL2_SRT_SHIFT	(16)
+#define ESR_EL2_SRT_MASK	(0x1f << ESR_EL2_SRT_SHIFT)
+#define ESR_EL2_SF 		(1 << 15)
+#define ESR_EL2_AR 		(1 << 14)
+#define ESR_EL2_EA 		(1 << 9)
+#define ESR_EL2_CM 		(1 << 8)
+#define ESR_EL2_S1PTW 		(1 << 7)
+#define ESR_EL2_WNR		(1 << 6)
+#define ESR_EL2_FSC		(0x3f)
+#define ESR_EL2_FSC_TYPE	(0x3c)
+
+#define ESR_EL2_CV_SHIFT	(24)
+#define ESR_EL2_CV		(1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_COND_SHIFT	(20)
+#define ESR_EL2_COND		(0xfU << ESR_EL2_COND_SHIFT)
+
+
+#define FSC_FAULT	(0x04)
+#define FSC_PERM	(0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~0xFUL)
+
+#define ESR_EL2_EC_UNKNOWN	(0x00)
+#define ESR_EL2_EC_WFI		(0x01)
+#define ESR_EL2_EC_CP15_32	(0x03)
+#define ESR_EL2_EC_CP15_64	(0x04)
+#define ESR_EL2_EC_CP14_MR	(0x05)
+#define ESR_EL2_EC_CP14_LS	(0x06)
+#define ESR_EL2_EC_FP_ASIMD	(0x07)
+#define ESR_EL2_EC_CP10_ID	(0x08)
+#define ESR_EL2_EC_CP14_64	(0x0C)
+#define ESR_EL2_EC_ILL_ISS	(0x0E)
+#define ESR_EL2_EC_SVC32	(0x11)
+#define ESR_EL2_EC_HVC32	(0x12)
+#define ESR_EL2_EC_SMC32	(0x13)
+#define ESR_EL2_EC_SVC64	(0x15)
+#define ESR_EL2_EC_HVC64	(0x16)
+#define ESR_EL2_EC_SMC64	(0x17)
+#define ESR_EL2_EC_SYS64	(0x18)
+#define ESR_EL2_EC_IABT		(0x20)
+#define ESR_EL2_EC_IABT_HYP	(0x21)
+#define ESR_EL2_EC_PC_ALIGN	(0x22)
+#define ESR_EL2_EC_DABT		(0x24)
+#define ESR_EL2_EC_DABT_HYP	(0x25)
+#define ESR_EL2_EC_SP_ALIGN	(0x26)
+#define ESR_EL2_EC_FP_EXC32	(0x28)
+#define ESR_EL2_EC_FP_EXC64	(0x2C)
+#define ESR_EL2_EC_SERROR	(0x2F)
+#define ESR_EL2_EC_BREAKPT	(0x30)
+#define ESR_EL2_EC_BREAKPT_HYP	(0x31)
+#define ESR_EL2_EC_SOFTSTP	(0x32)
+#define ESR_EL2_EC_SOFTSTP_HYP	(0x33)
+#define ESR_EL2_EC_WATCHPT	(0x34)
+#define ESR_EL2_EC_WATCHPT_HYP	(0x35)
+#define ESR_EL2_EC_BKPT32	(0x38)
+#define ESR_EL2_EC_VECTOR32	(0x3A)
+#define ESR_EL2_EC_BRK64	(0x3C)
+
+#define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
+
+#define ESR_EL2_EC_WFI_ISS_WFE	(1 << 0)
+
+#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 000000000000..483842180f8f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#include <asm/virt.h>
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
+#define	CSSELR_EL1	2	/* Cache Size Selection Register */
+#define	SCTLR_EL1	3	/* System Control Register */
+#define	ACTLR_EL1	4	/* Auxilliary Control Register */
+#define	CPACR_EL1	5	/* Coprocessor Access Control */
+#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
+#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
+#define	TCR_EL1		8	/* Translation Control Register */
+#define	ESR_EL1		9	/* Exception Syndrome Register */
+#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
+#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
+#define	FAR_EL1		12	/* Fault Address Register */
+#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
+#define	VBAR_EL1	14	/* Vector Base Address Register */
+#define	CONTEXTIDR_EL1	15	/* Context ID Register */
+#define	TPIDR_EL0	16	/* Thread ID, User R/W */
+#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
+#define	TPIDR_EL1	18	/* Thread ID, Privileged */
+#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
+#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
+#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
+#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1	38
+#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1	54
+#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1	70
+#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1	86
+#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+/* 32bit specific registers. Keep them at the end of the range */
+#define	DACR32_EL2	88	/* Domain Access Control Register */
+#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	94
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
+
+#define ARM_EXCEPTION_IRQ	  0
+#define ARM_EXCEPTION_TRAP	  1
+
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
+#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+#define	__kvm_hyp_code_start	__hyp_text_start
+#define	__kvm_hyp_code_end	__hyp_text_end
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
+
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000000000000..0b52377a6c11
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+	const struct sys_reg_desc *table;
+	size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+	struct kvm_sys_reg_table table64;
+	struct kvm_sys_reg_table table32;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000000..5674a55b5518
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return kvm_condition_valid32(vcpu);
+
+	return true;
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		kvm_skip_instr32(vcpu, is_wide_instr);
+	else
+		*vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
+	return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+	return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+}
+
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu_sys_reg(vcpu, MPIDR_EL1);
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
+	else
+		vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
+
+	return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		case 4:
+			return be32_to_cpu(data & 0xffffffff);
+		default:
+			return be64_to_cpu(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		case 4:
+			return le32_to_cpu(data & 0xffffffff);
+		default:
+			return le64_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		case 4:
+			return cpu_to_be32(data & 0xffffffff);
+		default:
+			return cpu_to_be64(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		case 4:
+			return cpu_to_le32(data & 0xffffffff);
+		default:
+			return cpu_to_le64(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000000000000..bcde41905746
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 3
+
+int __attribute_const__ kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* 1-level 2nd stage table and lock */
+	spinlock_t pgd_lock;
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
+	u64    vttbr;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
+
+	/* Timer */
+	struct arch_timer_kvm	timer;
+};
+
+#define KVM_NR_MEM_OBJS     40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 far_el2;		/* Hyp Fault Address Register */
+	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+	struct kvm_regs	gp_regs;
+	union {
+		u64 sys_regs[NR_SYS_REGS];
+		u32 copro[NR_COPRO_REGS];
+	};
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+	struct kvm_cpu_context ctxt;
+
+	/* HYP configuration */
+	u64 hcr_el2;
+
+	/* Exception Information */
+	struct kvm_vcpu_fault_info fault;
+
+	/* Debug state */
+	u64 debug_flags;
+
+	/* Pointer to host CPU context */
+	kvm_cpu_context_t *host_cpu_context;
+
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+	struct arch_timer_cpu timer_cpu;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+	/* dcache set/way operation pending */
+	int last_pcpu;
+	cpumask_t require_dcache_flush;
+
+	/* Don't run the guest */
+	bool pause;
+
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
+	/* Interrupt related fields */
+	u64 irq_lines;		/* IRQ and FIQ levels */
+
+	/* Cache some mmu pages needed inside spinlock regions */
+	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Target CPU and feature flags */
+	int target;
+	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
+};
+
+#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
+#else
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
+#endif
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_wakeup;
+};
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code.
+	 */
+	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		     hyp_stack_ptr, vector_ptr);
+}
+
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+	switch(vgic->type)
+	{
+	case VGIC_V2:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v2_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
+		break;
+
+#ifdef CONFIG_ARM_GIC_V3
+	case VGIC_V3:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v3_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v3_state;
+		break;
+#endif
+
+	default:
+		BUG();
+	}
+}
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
+#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..fc2f689c0694
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	u8		data[8];
+	u32		len;
+	bool		is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	run->mmio.phys_addr	= mmio->phys_addr;
+	run->mmio.len		= mmio->len;
+	run->mmio.is_write	= mmio->is_write;
+	memcpy(run->mmio.data, mmio->data, mmio->len);
+	run->exit_reason	= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 000000000000..a030d163840b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
+#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va	reg
+	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+
+#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * We currently only support a 40bit IPA.
+ */
+#define KVM_PHYS_SHIFT	(40)
+#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+/* Make sure we get the right size, and thus the right alignment */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
+#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
+
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
+#define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
+
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
+struct kvm;
+
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
+{
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+
+	if (!icache_is_aliasing()) {		/* PIPT */
+		flush_icache_range(hva, hva + size);
+	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
+#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
+
+void stage2_flush_vm(struct kvm *kvm);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/kvm_psci.h
index dca1094acc74..bc39e557c56c 100644
--- a/arch/arm64/include/asm/sigcontext.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -13,19 +14,14 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef __ASM_SIGCONTEXT_H
-#define __ASM_SIGCONTEXT_H
 
-#include <uapi/asm/sigcontext.h>
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
 
-/*
- * Auxiliary context saved in the sigcontext.__reserved array. Not exported to
- * user space as it will change with the addition of new context. User space
- * should check the magic/size information.
- */
-struct aux_context {
-	struct fpsimd_context fpsimd;
-	/* additional context to be added before "end" */
-	struct _aarch64_ctx end;
-};
-#endif
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index bf0838323896..71cd41644279 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -33,18 +33,23 @@
 #define UL(x) _AC(x, UL)
 
 /*
- * PAGE_OFFSET - the virtual address of the start of the kernel image.
+ * PAGE_OFFSET - the virtual address of the start of the kernel image (top
+ *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
-#define PAGE_OFFSET		UL(0xffffffc000000000)
+#ifdef CONFIG_ARM64_64K_PAGES
+#define VA_BITS			(42)
+#else
+#define VA_BITS			(39)
+#endif
+#define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END		(PAGE_OFFSET)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
-#define EARLYCON_IOBASE		(MODULES_VADDR - SZ_4M)
-#define VA_BITS			(39)
+#define FIXADDR_TOP		(MODULES_VADDR - SZ_2M - PAGE_SIZE)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
 
 #ifdef CONFIG_COMPAT
@@ -92,6 +97,12 @@
 #define MT_NORMAL_NC		3
 #define MT_NORMAL		4
 
+/*
+ * Memory types for Stage-2 translation
+ */
+#define MT_S2_NORMAL		0xf
+#define MT_S2_DEVICE_nGnRE	0x1
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
@@ -129,6 +140,7 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define virt_to_pfn(x)      __phys_to_pfn(__virt_to_phys(x))
 
 /*
  *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2494fc01896a..c2f006c48bdb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -22,10 +22,16 @@ typedef struct {
 	void *vdso;
 } mm_context_t;
 
+#define INIT_MM_CONTEXT(name) \
+	.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
+
 #define ASID(mm)	((mm)->context.id & 0xffff)
 
 extern void paging_init(void);
 extern void setup_mm_for_reboot(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
+extern void init_mem_pgprot(void);
+/* create an identity mapping for memory (or io if map_io is true) */
+extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io);
 
 #endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index e2bc385adb6b..a9eee33dfa62 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
-#ifdef CONFIG_SMP
-	/* check for possible thread migration */
-	if (!cpumask_empty(mm_cpumask(next)) &&
-	    !cpumask_test_cpu(cpu, mm_cpumask(next)))
-		__flush_icache_all();
-#endif
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
 		check_and_switch_context(next, tsk);
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 46bf66628b6a..a6331e6a92b5 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -31,6 +31,15 @@
 /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
 #define __HAVE_ARCH_GATE_AREA		1
 
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. The idmap only requires a pgd and a next level table to (section) map
+ * the kernel, while the swapper also maps the FDT and requires an additional
+ * table to map an early UART. See __create_page_tables for more information.
+ */
+#define SWAPPER_DIR_SIZE	(3 * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(2 * PAGE_SIZE)
+
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_ARM64_64K_PAGES
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
new file mode 100644
index 000000000000..453a179469a3
--- /dev/null
+++ b/arch/arm64/include/asm/percpu.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#ifdef CONFIG_SMP
+
+static inline void set_my_cpu_offset(unsigned long off)
+{
+	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+}
+
+static inline unsigned long __my_cpu_offset(void)
+{
+	unsigned long off;
+	register unsigned long *sp asm ("sp");
+
+	/*
+	 * We want to allow caching the value, so avoid using volatile and
+	 * instead use a fake stack read to hazard against barrier().
+	 */
+	asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp));
+
+	return off;
+}
+#define __my_cpu_offset __my_cpu_offset()
+
+#else	/* !CONFIG_SMP */
+
+#define set_my_cpu_offset(x)	do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h
index 0a8ed3f94e93..2593b490c56a 100644
--- a/arch/arm64/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h
@@ -21,10 +21,10 @@
  * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not
  * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each
  * entry representing 512MB. The user and kernel address spaces are limited to
- * 512GB and therefore we only use 1024 entries in the PGD.
+ * 4TB in the 64KB page configuration.
  */
 #define PTRS_PER_PTE		8192
-#define PTRS_PER_PGD		1024
+#define PTRS_PER_PGD		8192
 
 /*
  * PGDIR_SHIFT determines the size a top-level page table entry can map.
diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h
index 3c3ca7d361e4..5f101e63dfc1 100644
--- a/arch/arm64/include/asm/pgtable-2level-types.h
+++ b/arch/arm64/include/asm/pgtable-2level-types.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_PGTABLE_2LEVEL_TYPES_H
 #define __ASM_PGTABLE_2LEVEL_TYPES_H
 
+#include <asm/types.h>
+
 typedef u64 pteval_t;
 typedef u64 pgdval_t;
 typedef pgdval_t pmdval_t;
diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h
index 4489615f14a9..4e94424938a4 100644
--- a/arch/arm64/include/asm/pgtable-3level-types.h
+++ b/arch/arm64/include/asm/pgtable-3level-types.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_PGTABLE_3LEVEL_TYPES_H
 #define __ASM_PGTABLE_3LEVEL_TYPES_H
 
+#include <asm/types.h>
+
 typedef u64 pteval_t;
 typedef u64 pmdval_t;
 typedef u64 pgdval_t;
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 75fd13d289b9..f7af66b54cb2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -25,16 +25,27 @@
 /*
  * Hardware page table definitions.
  *
+ * Level 1 descriptor (PUD).
+ */
+
+#define PUD_TABLE_BIT		(_AT(pgdval_t, 1) << 1)
+
+/*
  * Level 2 descriptor (PMD).
  */
 #define PMD_TYPE_MASK		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 
 /*
  * Section
  */
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_PROT_NONE	(_AT(pmdval_t, 1) << 58)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_NG		(_AT(pmdval_t, 1) << 11)
@@ -53,6 +64,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
 #define PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
@@ -68,9 +80,29 @@
 #define PTE_ATTRINDX_MASK	(_AT(pteval_t, 7) << 2)
 
 /*
- * 40-bit physical address supported.
+ * 2nd stage PTE definitions
+ */
+#define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
+#define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+
+#define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
+/*
+ * Memory Attribute override for Stage-2 (MemAttr[3:0])
+ */
+#define PTE_S2_MEMATTR(t)	(_AT(pteval_t, (t)) << 2)
+#define PTE_S2_MEMATTR_MASK	(_AT(pteval_t, 0xf) << 2)
+
+/*
+ * EL2/HYP PTE/PMD definitions
+ */
+#define PMD_HYP			PMD_SECT_USER
+#define PTE_HYP			PTE_USER
+
+/*
+ * Highest possible physical address supported.
  */
-#define PHYS_MASK_SHIFT		(40)
+#define PHYS_MASK_SHIFT		(48)
 #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
 
 /*
@@ -90,7 +122,7 @@
 #define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
 #define TCR_TG0_64K		(UL(1) << 14)
 #define TCR_TG1_64K		(UL(1) << 30)
-#define TCR_IPS_40BIT		(UL(2) << 32)
 #define TCR_ASID16		(UL(1) << 36)
+#define TCR_TBI0		(UL(1) << 37)
 
 #endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3a710d7b14ce..fb4b26509276 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -25,15 +25,16 @@
  * Software defined PTE bits definition.
  */
 #define PTE_VALID		(_AT(pteval_t, 1) << 0)
-#define PTE_PROT_NONE		(_AT(pteval_t, 1) << 1)	/* only when !PTE_VALID */
 #define PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !pte_present() */
 #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
+#define PTE_WRITE		(_AT(pteval_t, 1) << 57)
+#define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
 
 /*
  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
  */
-#define VMALLOC_START		UL(0xffffff8000000000)
+#define VMALLOC_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define VMALLOC_END		(PAGE_OFFSET - UL(0x400000000) - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
@@ -51,60 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #endif
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
 
-/*
- * The pgprot_* and protection_map entries will be fixed up at runtime to
- * include the cachable and bufferable bits based on memory policy, as well as
- * any architecture dependent bits like global/ASID and SMP shared mapping
- * bits.
- */
-#define _PAGE_DEFAULT		PTE_TYPE_PAGE | PTE_AF
-
-extern pgprot_t pgprot_default;
-
-#define __pgprot_modify(prot,mask,bits) \
-	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#ifdef CONFIG_SMP
+#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#else
+#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF)
+#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF)
+#endif
 
-#define _MOD_PROT(p, b)		__pgprot_modify(p, 0, b)
-
-#define PAGE_NONE		__pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE)
-#define PAGE_SHARED		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_COPY		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define PAGE_COPY_EXEC		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define PAGE_READONLY		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define PAGE_KERNEL		_MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY)
-#define PAGE_KERNEL_EXEC	_MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY)
-
-#define __PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE)
-#define __PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define __PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define __PAGE_COPY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define __PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define __PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-
-#endif /* __ASSEMBLY__ */
-
-#define __P000  __PAGE_NONE
-#define __P001  __PAGE_READONLY
-#define __P010  __PAGE_COPY
-#define __P011  __PAGE_COPY
-#define __P100  __PAGE_READONLY_EXEC
-#define __P101  __PAGE_READONLY_EXEC
-#define __P110  __PAGE_COPY_EXEC
-#define __P111  __PAGE_COPY_EXEC
-
-#define __S000  __PAGE_NONE
-#define __S001  __PAGE_READONLY
-#define __S010  __PAGE_SHARED
-#define __S011  __PAGE_SHARED
-#define __S100  __PAGE_READONLY_EXEC
-#define __S101  __PAGE_READONLY_EXEC
-#define __S110  __PAGE_SHARED_EXEC
-#define __S111  __PAGE_SHARED_EXEC
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+
+#define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC	(PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+
+#define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
+
+#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+
+#define __P000  PAGE_NONE
+#define __P001  PAGE_READONLY
+#define __P010  PAGE_COPY
+#define __P011  PAGE_COPY
+#define __P100  PAGE_READONLY_EXEC
+#define __P101  PAGE_READONLY_EXEC
+#define __P110  PAGE_COPY_EXEC
+#define __P111  PAGE_COPY_EXEC
+
+#define __S000  PAGE_NONE
+#define __S001  PAGE_READONLY
+#define __S010  PAGE_SHARED
+#define __S011  PAGE_SHARED
+#define __S100  PAGE_READONLY_EXEC
+#define __S101  PAGE_READONLY_EXEC
+#define __S110  PAGE_SHARED_EXEC
+#define __S111  PAGE_SHARED_EXEC
 
-#ifndef __ASSEMBLY__
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
@@ -119,7 +119,7 @@ extern struct page *empty_zero_page;
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
@@ -129,30 +129,72 @@ extern struct page *empty_zero_page;
 /*
  * The following only work if pte_present(). Undefined behaviour otherwise.
  */
-#define pte_present(pte)	(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))
-#define pte_dirty(pte)		(pte_val(pte) & PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & PTE_AF)
-#define pte_special(pte)	(pte_val(pte) & PTE_SPECIAL)
-#define pte_write(pte)		(!(pte_val(pte) & PTE_RDONLY))
+#define pte_present(pte)	(!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
+#define pte_dirty(pte)		(!!(pte_val(pte) & PTE_DIRTY))
+#define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
+#define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
+#define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 
 #define pte_valid_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
+#define pte_valid_not_user(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) &= ~PTE_WRITE;
+	return pte;
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_val(pte) |= PTE_WRITE;
+	return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	pte_val(pte) &= ~PTE_DIRTY;
+	return pte;
+}
 
-#define PTE_BIT_FUNC(fn,op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pte_val(pte) |= PTE_DIRTY;
+	return pte;
+}
 
-PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY);
-PTE_BIT_FUNC(mkwrite,   &= ~PTE_RDONLY);
-PTE_BIT_FUNC(mkclean,   &= ~PTE_DIRTY);
-PTE_BIT_FUNC(mkdirty,   |= PTE_DIRTY);
-PTE_BIT_FUNC(mkold,     &= ~PTE_AF);
-PTE_BIT_FUNC(mkyoung,   |= PTE_AF);
-PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL);
+static inline pte_t pte_mkold(pte_t pte)
+{
+	pte_val(pte) &= ~PTE_AF;
+	return pte;
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	pte_val(pte) |= PTE_AF;
+	return pte;
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	pte_val(pte) |= PTE_SPECIAL;
+	return pte;
+}
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
+
+	/*
+	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
+	 * or update_mmu_cache() have the necessary barriers.
+	 */
+	if (pte_valid_not_user(pte)) {
+		dsb(ishst);
+		isb();
+	}
 }
 
 extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
@@ -163,8 +205,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (pte_valid_user(pte)) {
 		if (!pte_special(pte) && pte_exec(pte))
 			__sync_icache_dcache(pte, addr);
-		if (!pte_dirty(pte))
-			pte = pte_wrprotect(pte);
+		if (pte_dirty(pte) && pte_write(pte))
+			pte_val(pte) &= ~PTE_RDONLY;
+		else
+			pte_val(pte) |= PTE_RDONLY;
 	}
 
 	set_pte(ptep, pte);
@@ -173,11 +217,69 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 /*
  * Huge pte definitions.
  */
-#define pte_huge(pte)		((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE)
-#define pte_mkhuge(pte)		(__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE))
+#define pte_huge(pte)		(!(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+/*
+ * Hugetlb definitions.
+ */
+#define HUGE_MAX_HSTATE		2
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 
 #define __HAVE_ARCH_PTE_SPECIAL
 
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+/*
+ * THP definitions.
+ */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd))
+#endif
+
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK))
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+
+#define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
+#define __pgprot_modify(prot,mask,bits) \
+	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
 /*
  * Mark the prot value as uncacheable and unbufferable.
  */
@@ -197,10 +299,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 #define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
 
+#define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_TABLE)
+#define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_SECT)
+
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
-	dsb();
+	dsb(ishst);
+	isb();
 }
 
 static inline void pmd_clear(pmd_t *pmdp)
@@ -230,7 +339,8 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
 	*pudp = pud;
-	dsb();
+	dsb(ishst);
+	isb();
 }
 
 static inline void pud_clear(pud_t *pudp)
@@ -263,36 +373,40 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #endif
 
 /* Find an entry in the third-level page table.. */
-#define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
-			      PTE_PROT_NONE | PTE_VALID;
+			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
 
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 
-#define SWAPPER_DIR_SIZE	(3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(2 * PAGE_SIZE)
-
 /*
  * Encode and decode a swap entry:
  *	bits 0-1:	present (must be zero)
  *	bit  2:		PTE_FILE
  *	bits 3-8:	swap type
- *	bits 9-63:	swap offset
+ *	bits 9-57:	swap offset
  */
 #define __SWP_TYPE_SHIFT	3
 #define __SWP_TYPE_BITS		6
+#define __SWP_OFFSET_BITS	49
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_MASK	((1UL << __SWP_OFFSET_BITS) - 1)
 
 #define __swp_type(x)		(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
-#define __swp_offset(x)		((x).val >> __SWP_OFFSET_SHIFT)
+#define __swp_offset(x)		(((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
 #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
 
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
@@ -300,7 +414,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 
 /*
  * Ensure that there are not more swap files than can be encoded in the kernel
- * the PTEs.
+ * PTEs.
  */
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
 
@@ -308,13 +422,13 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
  * Encode and decode a file entry:
  *	bits 0-1:	present (must be zero)
  *	bit  2:		PTE_FILE
- *	bits 3-63:	file offset / PAGE_SIZE
+ *	bits 3-57:	file offset / PAGE_SIZE
  */
 #define pte_file(pte)		(pte_val(pte) & PTE_FILE)
 #define pte_to_pgoff(x)		(pte_val(x) >> 3)
 #define pgoff_to_pte(x)		__pte(((x) << 3) | PTE_FILE)
 
-#define PTE_FILE_MAX_BITS	61
+#define PTE_FILE_MAX_BITS	55
 
 extern int kern_addr_valid(unsigned long addr);
 
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 7cdf466fd0c5..0c657bb54597 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -26,11 +26,14 @@
 #include <asm/page.h>
 
 struct mm_struct;
+struct cpu_suspend_ctx;
 
 extern void cpu_cache_off(void);
 extern void cpu_do_idle(void);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
+extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
 #include <asm/memory.h>
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ab239b2c456f..45b20cd6cbca 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -107,6 +107,11 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->pstate = COMPAT_PSR_MODE_USR;
 	if (pc & 1)
 		regs->pstate |= COMPAT_PSR_T_BIT;
+
+#ifdef __AARCH64EB__
+	regs->pstate |= COMPAT_PSR_E_BIT;
+#endif
+
 	regs->compat_sp = sp;
 }
 #endif
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index 0604237ecd99..9a4b663670ff 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,25 +14,10 @@
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 
-#define PSCI_POWER_STATE_TYPE_STANDBY		0
-#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+struct cpuidle_driver;
+void psci_init(void);
 
-struct psci_power_state {
-	u16	id;
-	u8	type;
-	u8	affinity_level;
-};
-
-struct psci_operations {
-	int (*cpu_suspend)(struct psci_power_state state,
-			   unsigned long entry_point);
-	int (*cpu_off)(struct psci_power_state state);
-	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
-	int (*migrate)(unsigned long cpuid);
-};
-
-extern struct psci_operations psci_ops;
-
-int psci_init(void);
+int __init psci_dt_register_idle_states(struct cpuidle_driver *,
+					struct device_node *[]);
 
 #endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41a71ee4c3df..a429b5940be2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -42,6 +42,7 @@
 #define COMPAT_PSR_MODE_UND	0x0000001b
 #define COMPAT_PSR_MODE_SYS	0x0000001f
 #define COMPAT_PSR_T_BIT	0x00000020
+#define COMPAT_PSR_E_BIT	0x00000200
 #define COMPAT_PSR_F_BIT	0x00000040
 #define COMPAT_PSR_I_BIT	0x00000080
 #define COMPAT_PSR_A_BIT	0x00000100
@@ -67,6 +68,7 @@
 
 /* Architecturally defined mapping between AArch32 and AArch64 registers */
 #define compat_usr(x)	regs[(x)]
+#define compat_fp	regs[11]
 #define compat_sp	regs[13]
 #define compat_lr	regs[14]
 #define compat_sp_hyp	regs[15]
@@ -131,7 +133,12 @@ struct pt_regs {
 	(!((regs)->pstate & PSR_F_BIT))
 
 #define user_stack_pointer(regs) \
-	((regs)->sp)
+	(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
 
 /*
  * Are the current registers suitable for user mode? (used to maintain
@@ -163,7 +170,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs)
 	return 0;
 }
 
-#define instruction_pointer(regs)	(regs)->pc
+#define instruction_pointer(regs)	((unsigned long)(regs)->pc)
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
@@ -171,7 +178,5 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
-extern int aarch32_break_trap(struct pt_regs *regs);
-
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 4b8023c5d146..a498f2cd2c2a 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -60,21 +60,14 @@ struct secondary_data {
 	void *stack;
 };
 extern struct secondary_data secondary_data;
-extern void secondary_holding_pen(void);
-extern volatile unsigned long secondary_holding_pen_release;
+extern void secondary_entry(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-struct device_node;
+extern int __cpu_disable(void);
 
-struct smp_enable_ops {
-	const char	*name;
-	int		(*init_cpu)(struct device_node *, int);
-	int		(*prepare_cpu)(int);
-};
-
-extern const struct smp_enable_ops smp_spin_table_ops;
-extern const struct smp_enable_ops smp_psci_ops;
+extern void __cpu_die(unsigned int cpu);
+extern void cpu_die(void);
 
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index ed43a0d2b1b2..59e282311b58 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -21,6 +21,19 @@
 
 #include <asm/types.h>
 
+struct mpidr_hash {
+	u64	mask;
+	u32	shift_aff[4];
+	u32	bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+	return 1 << mpidr_hash.bits;
+}
+
 /*
  * Logical CPU mapping.
  */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 0defa0728a9b..c45b7b1b7197 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -22,17 +22,10 @@
 /*
  * Spinlock implementation.
  *
- * The old value is read exclusively and the new one, if unlocked, is written
- * exclusively. In case of failure, the loop is restarted.
- *
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
- *
- * Unlocked value: 0
- * Locked value: 1
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -41,32 +34,51 @@
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int tmp;
+	arch_spinlock_t lockval, newval;
 
 	asm volatile(
-	"	sevl\n"
-	"1:	wfe\n"
-	"2:	ldaxr	%w0, %1\n"
-	"	cbnz	%w0, 1b\n"
-	"	stxr	%w0, %w2, %1\n"
-	"	cbnz	%w0, 2b\n"
-	: "=&r" (tmp), "+Q" (lock->lock)
-	: "r" (1)
-	: "cc", "memory");
+	/* Atomically increment the next ticket. */
+"	prfm	pstl1strm, %3\n"
+"1:	ldaxr	%w0, %3\n"
+"	add	%w1, %w0, %w5\n"
+"	stxr	%w2, %w1, %3\n"
+"	cbnz	%w2, 1b\n"
+	/* Did we get the lock? */
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbz	%w1, 3f\n"
+	/*
+	 * No: spin on the owner. Send a local event to avoid missing an
+	 * unlock before the exclusive load.
+	 */
+"	sevl\n"
+"2:	wfe\n"
+"	ldaxrh	%w2, %4\n"
+"	eor	%w1, %w2, %w0, lsr #16\n"
+"	cbnz	%w1, 2b\n"
+	/* We got the lock. Critical section starts here. */
+"3:"
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
+	: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
+	: "memory");
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned int tmp;
+	arch_spinlock_t lockval;
 
 	asm volatile(
-	"2:	ldaxr	%w0, %1\n"
-	"	cbnz	%w0, 1f\n"
-	"	stxr	%w0, %w2, %1\n"
-	"	cbnz	%w0, 2b\n"
-	"1:\n"
-	: "=&r" (tmp), "+Q" (lock->lock)
-	: "r" (1)
-	: "cc", "memory");
+"	prfm	pstl1strm, %2\n"
+"1:	ldaxr	%w0, %2\n"
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbnz	%w1, 2f\n"
+"	add	%w0, %w0, %3\n"
+"	stxr	%w1, %w0, %2\n"
+"	cbnz	%w1, 1b\n"
+"2:"
+	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+	: "I" (1 << TICKET_SHIFT)
+	: "memory");
 
 	return !tmp;
 }
@@ -74,9 +86,28 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(
-	"	stlr	%w1, %0\n"
-	: "=Q" (lock->lock) : "r" (0) : "memory");
+"	stlrh	%w1, %0\n"
+	: "=Q" (lock->owner)
+	: "r" (lock->owner + 1)
+	: "memory");
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+	return (lockval.next - lockval.owner) > 1;
 }
+#define arch_spin_is_contended	arch_spin_is_contended
 
 /*
  * Write lock implementation.
@@ -101,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	"	cbnz	%w0, 2b\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -115,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp;
 }
@@ -156,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 2b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
@@ -170,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 1b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -185,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp2;
 }
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 9a494346efed..87692750ed94 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -20,14 +20,14 @@
 # error "please don't include this file directly"
 #endif
 
-/* We only require natural alignment for exclusive accesses. */
-#define __lock_aligned
+#define TICKET_SHIFT	16
 
 typedef struct {
-	volatile unsigned int lock;
-} arch_spinlock_t;
+	u16 owner;
+	u16 next;
+} __aligned(4) arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
new file mode 100644
index 000000000000..e9c149c042e0
--- /dev/null
+++ b/arch/arm64/include/asm/suspend.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_SUSPEND_H
+#define __ASM_SUSPEND_H
+
+#define NR_CTX_REGS 11
+
+/*
+ * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
+ * the stack, which must be 16-byte aligned on v8
+ */
+struct cpu_suspend_ctx {
+	/*
+	 * This struct must be kept in sync with
+	 * cpu_do_{suspend/resume} in mm/proc.S
+	 */
+	u64 ctx_regs[NR_CTX_REGS];
+	u64 sp;
+} __aligned(16);
+
+struct sleep_save_sp {
+	phys_addr_t *save_ptr_stash;
+	phys_addr_t save_ptr_stash_phys;
+};
+
+extern void cpu_resume(void);
+extern int cpu_suspend(unsigned long);
+
+#endif
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 70ba9d4ee978..383771eb0b87 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -18,6 +18,7 @@
 
 #include <linux/err.h>
 
+extern const void *sys_call_table[];
 
 static inline int syscall_get_nr(struct task_struct *task,
 				 struct pt_regs *regs)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000000..5c89df0acbcb
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,60 @@
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+	((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+
+#ifdef __ASSEMBLY__
+
+	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+	.equ	__reg_num_x\num, \num
+	.endr
+	.equ	__reg_num_xzr, 31
+
+	.macro	mrs_s, rt, sreg
+	.inst	0xd5300000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+	.macro	msr_s, sreg, rt
+	.inst	0xd5100000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+#else
+
+asm(
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
+"	.equ	__reg_num_x\\num, \\num\n"
+"	.endr\n"
+"	.equ	__reg_num_xzr, 31\n"
+"\n"
+"	.macro	mrs_s, rt, sreg\n"
+"	.inst	0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+"\n"
+"	.macro	msr_s, sreg, rt\n"
+"	.inst	0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+);
+
+#endif
+
+#endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 23a3c4791d86..59f151f8241d 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -97,6 +97,9 @@ static inline struct thread_info *current_thread_info(void)
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
+ *  TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ *  TIF_SYSCALL_AUDIT	- syscall auditing
+ *  TIF_SECOMP		- syscall secure computing
  *  TIF_SIGPENDING	- signal pending
  *  TIF_NEED_RESCHED	- rescheduling necessary
  *  TIF_NOTIFY_RESUME	- callback before returning to user
@@ -107,6 +110,9 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
+#define TIF_SYSCALL_AUDIT	9
+#define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11
 #define TIF_POLLING_NRFLAG	16
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_FREEZE		19
@@ -118,10 +124,17 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME)
 
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h
index b24a31a7e2c9..81a076eb37fa 100644
--- a/arch/arm64/include/asm/timex.h
+++ b/arch/arm64/include/asm/timex.h
@@ -16,14 +16,14 @@
 #ifndef __ASM_TIMEX_H
 #define __ASM_TIMEX_H
 
+#include <asm/arch_timer.h>
+
 /*
  * Use the current timer as a cycle counter since this is what we use for
  * the delay loop.
  */
-#define get_cycles()	({ cycles_t c; read_current_timer(&c); c; })
+#define get_cycles()	arch_counter_get_cntvct()
 
 #include <asm-generic/timex.h>
 
-#define ARCH_HAS_READ_CURRENT_TIMER
-
 #endif
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 5546653e5cc8..717031a762c2 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -190,4 +190,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 122d6320f745..3796ea6bb734 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
  */
 static inline void flush_tlb_all(void)
 {
-	dsb();
+	dsb(ishst);
 	asm("tlbi	vmalle1is");
-	dsb();
+	dsb(ish);
 	isb();
 }
 
@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long asid = (unsigned long)ASID(mm) << 48;
 
-	dsb();
+	dsb(ishst);
 	asm("tlbi	aside1is, %0" : : "r" (asid));
-	dsb();
+	dsb(ish);
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -93,16 +93,37 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr = uaddr >> 12 |
 		((unsigned long)ASID(vma->vm_mm) << 48);
 
-	dsb();
+	dsb(ishst);
 	asm("tlbi	vae1is, %0" : : "r" (addr));
-	dsb();
+	dsb(ish);
 }
 
-/*
- * Convert calls to our calling convention.
- */
-#define flush_tlb_range(vma,start,end)	__cpu_flush_user_tlb_range(start,end,vma)
-#define flush_tlb_kernel_range(s,e)	__cpu_flush_kern_tlb_range(s,e)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+					unsigned long start, unsigned long end)
+{
+	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+	unsigned long addr;
+	start = asid | (start >> 12);
+	end = asid | (end >> 12);
+
+	dsb(ishst);
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+		asm("tlbi vae1is, %0" : : "r"(addr));
+	dsb(ish);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	start >>= 12;
+	end >>= 12;
+
+	dsb(ishst);
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+		asm("tlbi vaae1is, %0" : : "r"(addr));
+	dsb(ish);
+	isb();
+}
 
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
@@ -111,12 +132,14 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 				    unsigned long addr, pte_t *ptep)
 {
 	/*
-	 * set_pte() does not have a DSB, so make sure that the page table
-	 * write is visible.
+	 * set_pte() does not have a DSB for user mappings, so make sure that
+	 * the page table write is visible.
 	 */
-	dsb();
+	dsb(ishst);
 }
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 000000000000..e0171b393a14
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,70 @@
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/cpumask.h>
+
+struct cpu_topology {
+	int thread_id;
+	int core_id;
+	int cluster_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].cluster_id)
+#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable()	(cpu_topology[0].cluster_id != -1)
+#define smt_capable()	(cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 0*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		 = jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 008f8481da65..3bf8f4e99a51 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -83,7 +83,7 @@ static inline void set_fs(mm_segment_t fs)
  * Returns 1 if the range is valid, 0 otherwise.
  *
  * This is equivalent to the following test:
- * (u65)addr + (u65)size < (u65)current->addr_limit
+ * (u65)addr + (u65)size <= current->addr_limit
  *
  * This needs 65-bit arithmetic.
  */
@@ -91,7 +91,7 @@ static inline void set_fs(mm_segment_t fs)
 ({									\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
-	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc"		\
+	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
 		: "1" (addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
@@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs)
 })
 
 #define access_ok(type, addr, size)	__range_ok(addr, size)
+#define user_addr_max			get_fs
 
 /*
  * The "__xxx" versions of the user access functions do not verify the address
@@ -166,9 +167,10 @@ do {									\
 
 #define get_user(x, ptr)						\
 ({									\
-	might_sleep();							\
-	access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ?			\
-		__get_user((x), (ptr)) :				\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	access_ok(VERIFY_READ, __p, sizeof(*__p)) ?			\
+		__get_user((x), __p) :					\
 		((x) = 0, -EFAULT);					\
 })
 
@@ -227,9 +229,10 @@ do {									\
 
 #define put_user(x, ptr)						\
 ({									\
-	might_sleep();							\
-	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ?		\
-		__put_user((x), (ptr)) :				\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ?			\
+		__put_user((x), __p) :					\
 		-EFAULT;						\
 })
 
@@ -238,9 +241,6 @@ extern unsigned long __must_check __copy_to_user(void __user *to, const void *fr
 extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
 extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
 
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
@@ -274,24 +274,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = -EFAULT;
-	if (access_ok(VERIFY_READ, src, 1))
-		res = __strncpy_from_user(dst, src, count);
-	return res;
-}
-
-#define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
-	unsigned long res = 0;
-
-	if (__addr_ok(s))
-		res = __strnlen_user(s, n);
-
-	return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 82ce217e94cf..c335479c2638 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -28,3 +28,5 @@
 #endif
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 26e310c54344..7a5df5252dd7 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -18,10 +18,10 @@
 #ifndef __ASM__VIRT_H
 #define __ASM__VIRT_H
 
-#define BOOT_CPU_MODE_EL2	(0x0e12b007)
+#define BOOT_CPU_MODE_EL1	(0xe11)
+#define BOOT_CPU_MODE_EL2	(0xe12)
 
 #ifndef __ASSEMBLY__
-#include <asm/cacheflush.h>
 
 /*
  * __boot_cpu_mode records what mode CPUs were booted in.
@@ -37,20 +37,9 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);
 
-static inline void sync_boot_mode(void)
-{
-	/*
-	 * As secondaries write to __boot_cpu_mode with caches disabled, we
-	 * must flush the corresponding cache entries to ensure the visibility
-	 * of their writes.
-	 */
-	__flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode));
-}
-
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
-	sync_boot_mode();
 	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
 		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
 }
@@ -58,10 +47,13 @@ static inline bool is_hyp_mode_available(void)
 /* Check if the bootloader has booted CPUs in different modes */
 static inline bool is_hyp_mode_mismatched(void)
 {
-	sync_boot_mode();
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..aab5bf09e9d9
--- /dev/null
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_WORD_AT_A_TIME_H
+#define __ASM_WORD_AT_A_TIME_H
+
+#ifndef __AARCH64EB__
+
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return fls64(mask) >> 3;
+}
+
+#define zero_bytemask(mask) (mask)
+
+#else	/* __AARCH64EB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, %3\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x7\n"
+	"	bic	%2, %2, #0x7\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+#ifndef __AARCH64EB__
+	"	lsr	%0, %0, %1\n"
+#else
+	"	lsl	%0, %0, %1\n"
+#endif
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.quad	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Q" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+#endif /* __ASM_WORD_AT_A_TIME_H */
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index e4b78bdca19e..942376d37d22 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -9,6 +9,7 @@ header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
 header-y += kvm_para.h
+header-y += perf_regs.h
 header-y += param.h
 header-y += ptrace.h
 header-y += setup.h
diff --git a/arch/arm64/include/uapi/asm/byteorder.h b/arch/arm64/include/uapi/asm/byteorder.h
index 2b92046aafc5..dc19e9537f0d 100644
--- a/arch/arm64/include/uapi/asm/byteorder.h
+++ b/arch/arm64/include/uapi/asm/byteorder.h
@@ -16,6 +16,10 @@
 #ifndef __ASM_BYTEORDER_H
 #define __ASM_BYTEORDER_H
 
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
 #include <linux/byteorder/little_endian.h>
+#endif
 
 #endif	/* __ASM_BYTEORDER_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index eea497578b87..73cf0f54d57c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -21,6 +21,11 @@
  */
 #define HWCAP_FP		(1 << 0)
 #define HWCAP_ASIMD		(1 << 1)
-
+#define HWCAP_EVTSTRM		(1 << 2)
+#define HWCAP_AES		(1 << 3)
+#define HWCAP_PMULL		(1 << 4)
+#define HWCAP_SHA1		(1 << 5)
+#define HWCAP_SHA2		(1 << 6)
+#define HWCAP_CRC32		(1 << 7)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..8e38878c87c6
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
+
+#ifndef __ASSEMBLY__
+#include <linux/psci.h>
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
+
+#define KVM_ARM_NUM_TARGETS		5
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX		127
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..172b8317ee49
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+	PERF_REG_ARM64_X0,
+	PERF_REG_ARM64_X1,
+	PERF_REG_ARM64_X2,
+	PERF_REG_ARM64_X3,
+	PERF_REG_ARM64_X4,
+	PERF_REG_ARM64_X5,
+	PERF_REG_ARM64_X6,
+	PERF_REG_ARM64_X7,
+	PERF_REG_ARM64_X8,
+	PERF_REG_ARM64_X9,
+	PERF_REG_ARM64_X10,
+	PERF_REG_ARM64_X11,
+	PERF_REG_ARM64_X12,
+	PERF_REG_ARM64_X13,
+	PERF_REG_ARM64_X14,
+	PERF_REG_ARM64_X15,
+	PERF_REG_ARM64_X16,
+	PERF_REG_ARM64_X17,
+	PERF_REG_ARM64_X18,
+	PERF_REG_ARM64_X19,
+	PERF_REG_ARM64_X20,
+	PERF_REG_ARM64_X21,
+	PERF_REG_ARM64_X22,
+	PERF_REG_ARM64_X23,
+	PERF_REG_ARM64_X24,
+	PERF_REG_ARM64_X25,
+	PERF_REG_ARM64_X26,
+	PERF_REG_ARM64_X27,
+	PERF_REG_ARM64_X28,
+	PERF_REG_ARM64_X29,
+	PERF_REG_ARM64_LR,
+	PERF_REG_ARM64_SP,
+	PERF_REG_ARM64_PC,
+	PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564961d4..ac389d32ccde 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -4,20 +4,34 @@
 
 CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET) \
+			   -I$(src)/../../../scripts/dtc/libfdt
+
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_return_address.o = -pg
 
 # Object file lists.
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o
+			   hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
-arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o
+arm64-obj-$(CONFIG_SMP)			+= topology.o
+arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
-arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
+arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
+arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
+arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
+arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
+arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 41b4f626d554..7f0512feaa13 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -29,16 +29,14 @@
 
 #include <asm/checksum.h>
 
-	/* user mem (segment) */
-EXPORT_SYMBOL(__strnlen_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
+	/* user mem (segment) */
 EXPORT_SYMBOL(__copy_from_user);
 EXPORT_SYMBOL(__copy_to_user);
 EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__copy_in_user);
 
 	/* physical memory */
 EXPORT_SYMBOL(memstart_addr);
@@ -58,3 +56,7 @@ EXPORT_SYMBOL(clear_bit);
 EXPORT_SYMBOL(test_and_clear_bit);
 EXPORT_SYMBOL(change_bit);
 EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a2a4d810bea3..9a9fce090d58 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -21,9 +21,12 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/cputable.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
 #include <linux/kbuild.h>
 
@@ -104,5 +107,59 @@ int main(void)
   BLANK();
   DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
+  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
+  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
+  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
+  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
+  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
+  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
+  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,	sizeof(struct vgic_sr_vectors));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+  DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
+  DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp));
+  DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask));
+  DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff));
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
new file mode 100644
index 000000000000..04efea8fe4bc
--- /dev/null
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -0,0 +1,99 @@
+/*
+ * CPU kernel entry/exit control
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/string.h>
+
+extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations cpu_psci_ops;
+
+const struct cpu_operations *cpu_ops[NR_CPUS];
+
+static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_SMP
+	&smp_spin_table_ops,
+	&cpu_psci_ops,
+#endif
+	NULL,
+};
+
+static const struct cpu_operations * __init cpu_get_ops(const char *name)
+{
+	const struct cpu_operations **ops = supported_cpu_ops;
+
+	while (*ops) {
+		if (!strcmp(name, (*ops)->name))
+			return *ops;
+
+		ops++;
+	}
+
+	return NULL;
+}
+
+/*
+ * Read a cpu's enable method from the device tree and record it in cpu_ops.
+ */
+int __init cpu_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method) {
+		/*
+		 * The boot CPU may not have an enable method (e.g. when
+		 * spin-table is used for secondaries). Don't warn spuriously.
+		 */
+		if (cpu != 0)
+			pr_err("%s: missing enable-method property\n",
+				dn->full_name);
+		return -ENOENT;
+	}
+
+	cpu_ops[cpu] = cpu_get_ops(enable_method);
+	if (!cpu_ops[cpu]) {
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void __init cpu_read_bootcpu_ops(void)
+{
+	struct device_node *dn = NULL;
+	u64 mpidr = cpu_logical_map(0);
+
+	while ((dn = of_find_node_by_type(dn, "cpu"))) {
+		u64 hwid;
+		const __be32 *prop;
+
+		prop = of_get_property(dn, "reg", NULL);
+		if (!prop)
+			continue;
+
+		hwid = of_read_number(prop, of_n_addr_cells(dn));
+		if (hwid == mpidr) {
+			cpu_read_ops(dn, 0);
+			of_node_put(dn);
+			return;
+		}
+	}
+}
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
index 63cfc4a43f4e..fd3993cb060f 100644
--- a/arch/arm64/kernel/cputable.c
+++ b/arch/arm64/kernel/cputable.c
@@ -22,7 +22,7 @@
 
 extern unsigned long __cpu_setup(void);
 
-struct cpu_info __initdata cpu_table[] = {
+struct cpu_info cpu_table[] = {
 	{
 		.cpu_id_val	= 0x000f0000,
 		.cpu_id_mask	= 0x000f0000,
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index f4726dc054b3..b33051d501e6 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -24,21 +24,13 @@
 #include <linux/init.h>
 #include <linux/ptrace.h>
 #include <linux/stat.h>
+#include <linux/uaccess.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/local.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS		(1 << 0)
-#define DBG_SPSR_SS		(1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE		(1 << 13)
-#define DBG_MDSCR_MDE		(1 << 15)
-#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
@@ -137,7 +129,6 @@ void disable_debug_monitors(enum debug_el el)
 static void clear_os_lock(void *unused)
 {
 	asm volatile("msr oslar_el1, %0" : : "r" (0));
-	isb();
 }
 
 static int __cpuinit os_lock_notify(struct notifier_block *self,
@@ -156,8 +147,9 @@ static struct notifier_block __cpuinitdata os_lock_nb = {
 static int __cpuinit debug_monitors_init(void)
 {
 	/* Clear the OS lock. */
-	smp_call_function(clear_os_lock, NULL, 1);
-	clear_os_lock(NULL);
+	on_each_cpu(clear_os_lock, NULL, 1);
+	isb();
+	local_dbg_enable();
 
 	/* Register hotplug handler. */
 	register_cpu_notifier(&os_lock_nb);
@@ -187,6 +179,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
 	regs->pstate = spsr;
 }
 
+/* EL1 Single Step Handler hooks */
+static LIST_HEAD(step_hook);
+static DEFINE_RWLOCK(step_hook_lock);
+
+void register_step_hook(struct step_hook *hook)
+{
+	write_lock(&step_hook_lock);
+	list_add(&hook->node, &step_hook);
+	write_unlock(&step_hook_lock);
+}
+
+void unregister_step_hook(struct step_hook *hook)
+{
+	write_lock(&step_hook_lock);
+	list_del(&hook->node);
+	write_unlock(&step_hook_lock);
+}
+
+/*
+ * Call registered single step handers
+ * There is no Syndrome info to check for determining the handler.
+ * So we call all the registered handlers, until the right handler is
+ * found which returns zero.
+ */
+static int call_step_hook(struct pt_regs *regs, unsigned int esr)
+{
+	struct step_hook *hook;
+	int retval = DBG_HOOK_ERROR;
+
+	read_lock(&step_hook_lock);
+
+	list_for_each_entry(hook, &step_hook, node)	{
+		retval = hook->fn(regs, esr);
+		if (retval == DBG_HOOK_HANDLED)
+			break;
+	}
+
+	read_unlock(&step_hook_lock);
+
+	return retval;
+}
+
 static int single_step_handler(unsigned long addr, unsigned int esr,
 			       struct pt_regs *regs)
 {
@@ -214,7 +248,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 		 */
 		user_rewind_single_step(current);
 	} else {
-		/* TODO: route to KGDB */
+		if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
+			return 0;
+
 		pr_warning("Unexpected kernel single-step exception at EL1\n");
 		/*
 		 * Re-enable stepping since we know that we will be
@@ -226,13 +262,113 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
-static int __init single_step_init(void)
+/*
+ * Breakpoint handler is re-entrant as another breakpoint can
+ * hit within breakpoint handler, especically in kprobes.
+ * Use reader/writer locks instead of plain spinlock.
+ */
+static LIST_HEAD(break_hook);
+static DEFINE_RWLOCK(break_hook_lock);
+
+void register_break_hook(struct break_hook *hook)
+{
+	write_lock(&break_hook_lock);
+	list_add(&hook->node, &break_hook);
+	write_unlock(&break_hook_lock);
+}
+
+void unregister_break_hook(struct break_hook *hook)
+{
+	write_lock(&break_hook_lock);
+	list_del(&hook->node);
+	write_unlock(&break_hook_lock);
+}
+
+static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+{
+	struct break_hook *hook;
+	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+
+	read_lock(&break_hook_lock);
+	list_for_each_entry(hook, &break_hook, node)
+		if ((esr & hook->esr_mask) == hook->esr_val)
+			fn = hook->fn;
+	read_unlock(&break_hook_lock);
+
+	return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
+}
+
+static int brk_handler(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+		return 0;
+
+	if (!user_mode(regs))
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = (void __user *)instruction_pointer(regs),
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+int aarch32_break_handler(struct pt_regs *regs)
+{
+	siginfo_t info;
+	unsigned int instr;
+	bool bp = false;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!compat_user_mode(regs))
+		return -EFAULT;
+
+	if (compat_thumb_mode(regs)) {
+		/* get 16-bit Thumb instruction */
+		get_user(instr, (u16 __user *)pc);
+		if (instr == AARCH32_BREAK_THUMB2_LO) {
+			/* get second half of 32-bit Thumb-2 instruction */
+			get_user(instr, (u16 __user *)(pc + 2));
+			bp = instr == AARCH32_BREAK_THUMB2_HI;
+		} else {
+			bp = instr == AARCH32_BREAK_THUMB;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		get_user(instr, (u32 __user *)pc);
+		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
+	}
+
+	if (!bp)
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = pc,
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+static int __init debug_traps_init(void)
 {
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_HWBKPT, "single-step handler");
+	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
+			      TRAP_BRKPT, "ptrace BRK handler");
 	return 0;
 }
-arch_initcall(single_step_init);
+arch_initcall(debug_traps_init);
 
 /* Re-enable single step for syscall restarting. */
 void user_rewind_single_step(struct task_struct *task)
diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c
index fbb6e1843659..ffbbdde7aba1 100644
--- a/arch/arm64/kernel/early_printk.c
+++ b/arch/arm64/kernel/early_printk.c
@@ -26,6 +26,8 @@
 #include <linux/amba/serial.h>
 #include <linux/serial_reg.h>
 
+#include <asm/fixmap.h>
+
 static void __iomem *early_base;
 static void (*printch)(char ch);
 
@@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf)
 	}
 	/* no options parsing yet */
 
-	if (paddr)
-		early_base = early_io_map(paddr, EARLYCON_IOBASE);
+	if (paddr) {
+		set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr);
+		early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE);
+	}
 
 	printch = match->printch;
 	early_console = &early_console_dev;
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
new file mode 100644
index 000000000000..66716c9b9e5f
--- /dev/null
+++ b/arch/arm64/kernel/efi-entry.S
@@ -0,0 +1,109 @@
+/*
+ * EFI entry point.
+ *
+ * Copyright (C) 2013, 2014 Red Hat, Inc.
+ * Author: Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+
+#define EFI_LOAD_ERROR 0x8000000000000001
+
+	__INIT
+
+	/*
+	 * We arrive here from the EFI boot manager with:
+	 *
+	 *    * CPU in little-endian mode
+	 *    * MMU on with identity-mapped RAM
+	 *    * Icache and Dcache on
+	 *
+	 * We will most likely be running from some place other than where
+	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET
+	 * from start of RAM.
+	 */
+ENTRY(efi_stub_entry)
+	/*
+	 * Create a stack frame to save FP/LR with extra space
+	 * for image_addr variable passed to efi_entry().
+	 */
+	stp	x29, x30, [sp, #-32]!
+
+	/*
+	 * Call efi_entry to do the real work.
+	 * x0 and x1 are already set up by firmware. Current runtime
+	 * address of image is calculated and passed via *image_addr.
+	 *
+	 * unsigned long efi_entry(void *handle,
+	 *                         efi_system_table_t *sys_table,
+	 *                         unsigned long *image_addr) ;
+	 */
+	adrp	x8, _text
+	add	x8, x8, #:lo12:_text
+	add	x2, sp, 16
+	str	x8, [x2]
+	bl	efi_entry
+	cmn	x0, #1
+	b.eq	efi_load_fail
+
+	/*
+	 * efi_entry() will have relocated the kernel image if necessary
+	 * and we return here with device tree address in x0 and the kernel
+	 * entry point stored at *image_addr. Save those values in registers
+	 * which are callee preserved.
+	 */
+	mov	x20, x0		// DTB address
+	ldr	x0, [sp, #16]	// relocated _text address
+	mov	x21, x0
+
+	/*
+	 * Flush dcache covering current runtime addresses
+	 * of kernel text/data. Then flush all of icache.
+	 */
+	adrp	x1, _text
+	add	x1, x1, #:lo12:_text
+	adrp	x2, _edata
+	add	x2, x2, #:lo12:_edata
+	sub	x1, x2, x1
+
+	bl	__flush_dcache_area
+	ic	ialluis
+
+	/* Turn off Dcache and MMU */
+	mrs	x0, CurrentEL
+	cmp	x0, #PSR_MODE_EL2t
+	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	bic	x0, x0, #1 << 0	// clear SCTLR.M
+	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	msr	sctlr_el2, x0
+	isb
+	b	2f
+1:
+	mrs	x0, sctlr_el1
+	bic	x0, x0, #1 << 0	// clear SCTLR.M
+	bic	x0, x0, #1 << 2	// clear SCTLR.C
+	msr	sctlr_el1, x0
+	isb
+2:
+	/* Jump to kernel entry point */
+	mov	x0, x20
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x21
+
+efi_load_fail:
+	mov	x0, #EFI_LOAD_ERROR
+	ldp	x29, x30, [sp], #32
+	ret
+
+ENDPROC(efi_stub_entry)
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
new file mode 100644
index 000000000000..e786e6cdc400
--- /dev/null
+++ b/arch/arm64/kernel/efi-stub.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org>
+ *
+ * This file implements the EFI boot stub for the arm64 kernel.
+ * Adapted from ARM version by Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/efi.h>
+#include <linux/libfdt.h>
+#include <asm/sections.h>
+
+/*
+ * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
+ * start of kernel and may not cross a 2MiB boundary. We set alignment to
+ * 2MiB so we know it won't cross a 2MiB boundary.
+ */
+#define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
+#define MAX_FDT_OFFSET	SZ_512M
+
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+
+static void efi_char16_printk(efi_system_table_t *sys_table_arg,
+			      efi_char16_t *str);
+
+static efi_status_t efi_open_volume(efi_system_table_t *sys_table,
+				    void *__image, void **__fh);
+static efi_status_t efi_file_close(void *handle);
+
+static efi_status_t
+efi_file_read(void *handle, unsigned long *size, void *addr);
+
+static efi_status_t
+efi_file_size(efi_system_table_t *sys_table, void *__fh,
+	      efi_char16_t *filename_16, void **handle, u64 *file_sz);
+
+/* Include shared EFI stub code */
+#include "../../../drivers/firmware/efi/efi-stub-helper.c"
+#include "../../../drivers/firmware/efi/fdt.c"
+#include "../../../drivers/firmware/efi/arm-stub.c"
+
+
+static efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
+					unsigned long *image_addr,
+					unsigned long *image_size,
+					unsigned long *reserve_addr,
+					unsigned long *reserve_size,
+					unsigned long dram_base,
+					efi_loaded_image_t *image)
+{
+	efi_status_t status;
+	unsigned long kernel_size, kernel_memsize = 0;
+
+	/* Relocate the image, if required. */
+	kernel_size = _edata - _text;
+	if (*image_addr != (dram_base + TEXT_OFFSET)) {
+		kernel_memsize = kernel_size + (_end - _edata);
+		status = efi_relocate_kernel(sys_table, image_addr,
+					     kernel_size, kernel_memsize,
+					     dram_base + TEXT_OFFSET,
+					     PAGE_SIZE);
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table, "Failed to relocate kernel\n");
+			return status;
+		}
+		if (*image_addr != (dram_base + TEXT_OFFSET)) {
+			pr_efi_err(sys_table, "Failed to alloc kernel memory\n");
+			efi_free(sys_table, kernel_memsize, *image_addr);
+			return EFI_ERROR;
+		}
+		*image_size = kernel_memsize;
+	}
+
+
+	return EFI_SUCCESS;
+}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
new file mode 100644
index 000000000000..14db1f6e8d7f
--- /dev/null
+++ b/arch/arm64/kernel/efi.c
@@ -0,0 +1,469 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 2.4
+ *
+ * Copyright (C) 2013, 2014 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/bootmem.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/efi.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+struct efi_memory_map memmap;
+
+static efi_runtime_services_t *runtime;
+
+static u64 efi_system_table;
+
+static int uefi_debug __initdata;
+static int __init uefi_debug_setup(char *str)
+{
+	uefi_debug = 1;
+
+	return 0;
+}
+early_param("uefi_debug", uefi_debug_setup);
+
+static int __init is_normal_ram(efi_memory_desc_t *md)
+{
+	if (md->attribute & EFI_MEMORY_WB)
+		return 1;
+	return 0;
+}
+
+static void __init efi_setup_idmap(void)
+{
+	struct memblock_region *r;
+	efi_memory_desc_t *md;
+	u64 paddr, npages, size;
+
+	for_each_memblock(memory, r)
+		create_id_mapping(r->base, r->size, 0);
+
+	/* map runtime io spaces */
+	for_each_efi_memory_desc(&memmap, md) {
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md))
+			continue;
+		paddr = md->phys_addr;
+		npages = md->num_pages;
+		memrange_efi_to_native(&paddr, &npages);
+		size = npages << PAGE_SHIFT;
+		create_id_mapping(paddr, size, 1);
+	}
+}
+
+static int __init uefi_init(void)
+{
+	efi_char16_t *c16;
+	char vendor[100] = "unknown";
+	int i, retval;
+
+	efi.systab = early_memremap(efi_system_table,
+				    sizeof(efi_system_table_t));
+	if (efi.systab == NULL) {
+		pr_warn("Unable to map EFI system table.\n");
+		return -ENOMEM;
+	}
+
+	set_bit(EFI_BOOT, &efi.flags);
+	set_bit(EFI_64BIT, &efi.flags);
+
+	/*
+	 * Verify the EFI Table
+	 */
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+		pr_err("System table signature incorrect\n");
+		return -EINVAL;
+	}
+	if ((efi.systab->hdr.revision >> 16) < 2)
+		pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
+			efi.systab->hdr.revision >> 16,
+			efi.systab->hdr.revision & 0xffff);
+
+	/* Show what we know for posterity */
+	c16 = early_memremap(efi.systab->fw_vendor,
+			     sizeof(vendor));
+	if (c16) {
+		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
+			vendor[i] = c16[i];
+		vendor[i] = '\0';
+	}
+
+	pr_info("EFI v%u.%.02u by %s\n",
+		efi.systab->hdr.revision >> 16,
+		efi.systab->hdr.revision & 0xffff, vendor);
+
+	retval = efi_config_init(NULL);
+	if (retval == 0)
+		set_bit(EFI_CONFIG_TABLES, &efi.flags);
+
+	early_memunmap(c16, sizeof(vendor));
+	early_memunmap(efi.systab,  sizeof(efi_system_table_t));
+
+	return retval;
+}
+
+static __initdata char memory_type_name[][32] = {
+	{"Reserved"},
+	{"Loader Code"},
+	{"Loader Data"},
+	{"Boot Code"},
+	{"Boot Data"},
+	{"Runtime Code"},
+	{"Runtime Data"},
+	{"Conventional Memory"},
+	{"Unusable Memory"},
+	{"ACPI Reclaim Memory"},
+	{"ACPI Memory NVS"},
+	{"Memory Mapped I/O"},
+	{"MMIO Port Space"},
+	{"PAL Code"},
+};
+
+/*
+ * Return true for RAM regions we want to permanently reserve.
+ */
+static __init int is_reserve_region(efi_memory_desc_t *md)
+{
+	if (!is_normal_ram(md))
+		return 0;
+
+	if (md->attribute & EFI_MEMORY_RUNTIME)
+		return 1;
+
+	if (md->type == EFI_ACPI_RECLAIM_MEMORY ||
+	    md->type == EFI_RESERVED_TYPE)
+		return 1;
+
+	return 0;
+}
+
+static __init void reserve_regions(void)
+{
+	efi_memory_desc_t *md;
+	u64 paddr, npages, size;
+
+	if (uefi_debug)
+		pr_info("Processing EFI memory map:\n");
+
+	for_each_efi_memory_desc(&memmap, md) {
+		paddr = md->phys_addr;
+		npages = md->num_pages;
+
+		if (uefi_debug)
+			pr_info("  0x%012llx-0x%012llx [%s]",
+				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
+				memory_type_name[md->type]);
+
+		memrange_efi_to_native(&paddr, &npages);
+		size = npages << PAGE_SHIFT;
+
+		if (is_normal_ram(md))
+			early_init_dt_add_memory_arch(paddr, size);
+
+		if (is_reserve_region(md) ||
+		    md->type == EFI_BOOT_SERVICES_CODE ||
+		    md->type == EFI_BOOT_SERVICES_DATA) {
+			memblock_reserve(paddr, size);
+			if (uefi_debug)
+				pr_cont("*");
+		}
+
+		if (uefi_debug)
+			pr_cont("\n");
+	}
+}
+
+
+static u64 __init free_one_region(u64 start, u64 end)
+{
+	u64 size = end - start;
+
+	if (uefi_debug)
+		pr_info("  EFI freeing: 0x%012llx-0x%012llx\n",	start, end - 1);
+
+	free_bootmem_late(start, size);
+	return size;
+}
+
+static u64 __init free_region(u64 start, u64 end)
+{
+	u64 map_start, map_end, total = 0;
+
+	if (end <= start)
+		return total;
+
+	map_start = (u64)memmap.phys_map;
+	map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map));
+	map_start &= PAGE_MASK;
+
+	if (start < map_end && end > map_start) {
+		/* region overlaps UEFI memmap */
+		if (start < map_start)
+			total += free_one_region(start, map_start);
+
+		if (map_end < end)
+			total += free_one_region(map_end, end);
+	} else
+		total += free_one_region(start, end);
+
+	return total;
+}
+
+static void __init free_boot_services(void)
+{
+	u64 total_freed = 0;
+	u64 keep_end, free_start, free_end;
+	efi_memory_desc_t *md;
+
+	/*
+	 * If kernel uses larger pages than UEFI, we have to be careful
+	 * not to inadvertantly free memory we want to keep if there is
+	 * overlap at the kernel page size alignment. We do not want to
+	 * free is_reserve_region() memory nor the UEFI memmap itself.
+	 *
+	 * The memory map is sorted, so we keep track of the end of
+	 * any previous region we want to keep, remember any region
+	 * we want to free and defer freeing it until we encounter
+	 * the next region we want to keep. This way, before freeing
+	 * it, we can clip it as needed to avoid freeing memory we
+	 * want to keep for UEFI.
+	 */
+
+	keep_end = 0;
+	free_start = 0;
+
+	for_each_efi_memory_desc(&memmap, md) {
+		u64 paddr, npages, size;
+
+		if (is_reserve_region(md)) {
+			/*
+			 * We don't want to free any memory from this region.
+			 */
+			if (free_start) {
+				/* adjust free_end then free region */
+				if (free_end > md->phys_addr)
+					free_end -= PAGE_SIZE;
+				total_freed += free_region(free_start, free_end);
+				free_start = 0;
+			}
+			keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+			continue;
+		}
+
+		if (md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA) {
+			/* no need to free this region */
+			continue;
+		}
+
+		/*
+		 * We want to free memory from this region.
+		 */
+		paddr = md->phys_addr;
+		npages = md->num_pages;
+		memrange_efi_to_native(&paddr, &npages);
+		size = npages << PAGE_SHIFT;
+
+		if (free_start) {
+			if (paddr <= free_end)
+				free_end = paddr + size;
+			else {
+				total_freed += free_region(free_start, free_end);
+				free_start = paddr;
+				free_end = paddr + size;
+			}
+		} else {
+			free_start = paddr;
+			free_end = paddr + size;
+		}
+		if (free_start < keep_end) {
+			free_start += PAGE_SIZE;
+			if (free_start >= free_end)
+				free_start = 0;
+		}
+	}
+	if (free_start)
+		total_freed += free_region(free_start, free_end);
+
+	if (total_freed)
+		pr_info("Freed 0x%llx bytes of EFI boot services memory",
+			total_freed);
+}
+
+void __init efi_init(void)
+{
+	struct efi_fdt_params params;
+
+	/* Grab UEFI information placed in FDT by stub */
+	if (!efi_get_fdt_params(&params, uefi_debug))
+		return;
+
+	efi_system_table = params.system_table;
+
+	memblock_reserve(params.mmap & PAGE_MASK,
+			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
+	memmap.phys_map = (void *)params.mmap;
+	memmap.map = early_memremap(params.mmap, params.mmap_size);
+	memmap.map_end = memmap.map + params.mmap_size;
+	memmap.desc_size = params.desc_size;
+	memmap.desc_version = params.desc_ver;
+
+	if (uefi_init() < 0)
+		return;
+
+	reserve_regions();
+}
+
+void __init efi_idmap_init(void)
+{
+	if (!efi_enabled(EFI_BOOT))
+		return;
+
+	/* boot time idmap_pg_dir is incomplete, so fill in missing parts */
+	efi_setup_idmap();
+}
+
+static int __init remap_region(efi_memory_desc_t *md, void **new)
+{
+	u64 paddr, vaddr, npages, size;
+
+	paddr = md->phys_addr;
+	npages = md->num_pages;
+	memrange_efi_to_native(&paddr, &npages);
+	size = npages << PAGE_SHIFT;
+
+	if (is_normal_ram(md))
+		vaddr = (__force u64)ioremap_cache(paddr, size);
+	else
+		vaddr = (__force u64)ioremap(paddr, size);
+
+	if (!vaddr) {
+		pr_err("Unable to remap 0x%llx pages @ %p\n",
+		       npages, (void *)paddr);
+		return 0;
+	}
+
+	/* adjust for any rounding when EFI and system pagesize differs */
+	md->virt_addr = vaddr + (md->phys_addr - paddr);
+
+	if (uefi_debug)
+		pr_info("  EFI remap 0x%012llx => %p\n",
+			md->phys_addr, (void *)md->virt_addr);
+
+	memcpy(*new, md, memmap.desc_size);
+	*new += memmap.desc_size;
+
+	return 1;
+}
+
+/*
+ * Switch UEFI from an identity map to a kernel virtual map
+ */
+static int __init arm64_enter_virtual_mode(void)
+{
+	efi_memory_desc_t *md;
+	phys_addr_t virtmap_phys;
+	void *virtmap, *virt_md;
+	efi_status_t status;
+	u64 mapsize;
+	int count = 0;
+	unsigned long flags;
+
+	if (!efi_enabled(EFI_BOOT)) {
+		pr_info("EFI services will not be available.\n");
+		return -1;
+	}
+
+	pr_info("Remapping and enabling EFI services.\n");
+
+	/* replace early memmap mapping with permanent mapping */
+	mapsize = memmap.map_end - memmap.map;
+	early_memunmap(memmap.map, mapsize);
+	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
+						   mapsize);
+	memmap.map_end = memmap.map + mapsize;
+
+	efi.memmap = &memmap;
+
+	/* Map the runtime regions */
+	virtmap = kmalloc(mapsize, GFP_KERNEL);
+	if (!virtmap) {
+		pr_err("Failed to allocate EFI virtual memmap\n");
+		return -1;
+	}
+	virtmap_phys = virt_to_phys(virtmap);
+	virt_md = virtmap;
+
+	for_each_efi_memory_desc(&memmap, md) {
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (remap_region(md, &virt_md))
+			++count;
+	}
+
+	efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
+	if (efi.systab)
+		set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+	local_irq_save(flags);
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+
+	/* Call SetVirtualAddressMap with the physical address of the map */
+	runtime = efi.systab->runtime;
+	efi.set_virtual_address_map = runtime->set_virtual_address_map;
+
+	status = efi.set_virtual_address_map(count * memmap.desc_size,
+					     memmap.desc_size,
+					     memmap.desc_version,
+					     (efi_memory_desc_t *)virtmap_phys);
+	cpu_set_reserved_ttbr0();
+	flush_tlb_all();
+	local_irq_restore(flags);
+
+	kfree(virtmap);
+
+	free_boot_services();
+
+	if (status != EFI_SUCCESS) {
+		pr_err("Failed to set EFI virtual address map! [%lx]\n",
+			status);
+		return -1;
+	}
+
+	/* Set up runtime services function pointers */
+	runtime = efi.systab->runtime;
+	efi.get_time = runtime->get_time;
+	efi.set_time = runtime->set_time;
+	efi.get_wakeup_time = runtime->get_wakeup_time;
+	efi.set_wakeup_time = runtime->set_wakeup_time;
+	efi.get_variable = runtime->get_variable;
+	efi.get_next_variable = runtime->get_next_variable;
+	efi.set_variable = runtime->set_variable;
+	efi.query_variable_info = runtime->query_variable_info;
+	efi.update_capsule = runtime->update_capsule;
+	efi.query_capsule_caps = runtime->query_capsule_caps;
+	efi.get_next_high_mono_count = runtime->get_next_high_mono_count;
+	efi.reset_system = runtime->reset_system;
+
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+	return 0;
+}
+early_initcall(arm64_enter_virtual_mode);
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 000000000000..b051871f2965
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,218 @@
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ *      mov x0, x30
+ *      bl _mcount
+ *	[function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp =>  0:+-----+
+ * in _mcount()        | x29 | -> instrumented function's fp
+ *                     +-----+
+ *                     | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp       => +16:+-----+
+ * when instrumented   |     |
+ * function calls      | ... |
+ * _mcount()           |     |
+ *                     |     |
+ * instrumented => +xx:+-----+
+ * function's fp       | x29 | -> parent's fp
+ *                     +-----+
+ *                     | x30 | -> instrumented function's lr (= parent's pc)
+ *                     +-----+
+ *                     | ... |
+ */
+
+	.macro mcount_enter
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	.endm
+
+	.macro mcount_exit
+	ldp	x29, x30, [sp], #16
+	ret
+	.endm
+
+	.macro mcount_adjust_addr rd, rn
+	sub	\rd, \rn, #AARCH64_INSN_SIZE
+	.endm
+
+	/* for instrumented function's parent */
+	.macro mcount_get_parent_fp reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg]
+	.endm
+
+	/* for instrumented function */
+	.macro mcount_get_pc0 reg
+	mcount_adjust_addr	\reg, x30
+	.endm
+
+	.macro mcount_get_pc reg
+	ldr	\reg, [x29, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr_addr reg
+	ldr	\reg, [x29]
+	add	\reg, \reg, #8
+	.endm
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ *     - tracer function to probe instrumented function's entry,
+ *     - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	ldr	x0, =ftrace_trace_stop
+	ldr	x0, [x0]		// if ftrace_trace_stop
+	ret				//   return;
+#endif
+	mcount_enter
+
+	ldr	x0, =ftrace_trace_function
+	ldr	x2, [x0]
+	adr	x0, ftrace_stub
+	cmp	x0, x2			// if (ftrace_trace_function
+	b.eq	skip_ftrace_call	//     != ftrace_stub) {
+
+	mcount_get_pc	x0		//       function's pc
+	mcount_get_lr	x1		//       function's lr (= parent's pc)
+	blr	x2			//   (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call:			//   return;
+	mcount_exit			// }
+#else
+	mcount_exit			//   return;
+					// }
+skip_ftrace_call:
+	ldr	x1, =ftrace_graph_return
+	ldr	x2, [x1]		//   if ((ftrace_graph_return
+	cmp	x0, x2			//        != ftrace_stub)
+	b.ne	ftrace_graph_caller
+
+	ldr	x1, =ftrace_graph_entry	//     || (ftrace_graph_entry
+	ldr	x2, [x1]		//        != ftrace_graph_entry_stub))
+	ldr	x0, =ftrace_graph_entry_stub
+	cmp	x0, x2
+	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
+
+	mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * _mcount() is used to build the kernel with -pg option, but all the branch
+ * instructions to _mcount() are replaced to NOP initially at kernel start up,
+ * and later on, NOP to branch to ftrace_caller() when enabled or branch to
+ * NOP when disabled per-function base.
+ */
+ENTRY(_mcount)
+	ret
+ENDPROC(_mcount)
+
+/*
+ * void ftrace_caller(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function is a counterpart of _mcount() in 'static' ftrace, and
+ * makes calls to:
+ *     - tracer function to probe instrumented function's entry,
+ *     - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(ftrace_caller)
+	mcount_enter
+
+	mcount_get_pc0	x0		//     function's pc
+	mcount_get_lr	x1		//     function's lr
+
+	.global ftrace_call
+ftrace_call:				// tracer(pc, lr);
+	nop				// This will be replaced with "bl xxx"
+					// where xxx can be any kind of tracer.
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.global ftrace_graph_call
+ftrace_graph_call:			// ftrace_graph_caller();
+	nop				// If enabled, this will be replaced
+					// "b ftrace_graph_caller"
+#endif
+
+	mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(ftrace_stub)
+	ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
+	mcount_get_pc		  x1	//     function's pc
+	mcount_get_parent_fp	  x2	//     parent's fp
+	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
+
+	mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+	str	x0, [sp, #-16]!
+	mov	x0, x29			//     parent's fp
+	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+	mov	x30, x0			// restore the original return address
+	ldr	x0, [sp], #16
+	ret
+END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7cd589ebca2a..56ef569b2b62 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -287,6 +287,8 @@ el1_dbg:
 	/*
 	 * Debug exception handling
 	 */
+	cmp	x24, #ESR_EL1_EC_BRK64		// if BRK64
+	cinc	x24, x24, eq			// set bit '0'
 	tbz	x24, #0, el1_inv		// EL1 only
 	mrs	x0, far_el1
 	mov	x2, sp				// struct pt_regs
@@ -310,14 +312,14 @@ el1_irq:
 #endif
 #ifdef CONFIG_PREEMPT
 	get_thread_info tsk
-	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count
-	add	x0, x24, #1			// increment it
-	str	x0, [tsk, #TI_PREEMPT]
+	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
+	add	w0, w24, #1			// increment it
+	str	w0, [tsk, #TI_PREEMPT]
 #endif
 	irq_handler
 #ifdef CONFIG_PREEMPT
-	str	x24, [tsk, #TI_PREEMPT]		// restore preempt count
-	cbnz	x24, 1f				// preempt count != 0
+	str	w24, [tsk, #TI_PREEMPT]		// restore preempt count
+	cbnz	w24, 1f				// preempt count != 0
 	ldr	x0, [tsk, #TI_FLAGS]		// get flags
 	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
 	bl	el1_preempt
@@ -422,6 +424,7 @@ el0_da:
 	 * Data abort handling
 	 */
 	mrs	x0, far_el1
+	bic	x0, x0, #(0xff << 56)
 	disable_step x1
 	isb
 	enable_dbg
@@ -475,6 +478,8 @@ el0_undef:
 	 * Undefined instruction
 	 */
 	mov	x0, sp
+	// enable interrupts before calling the main handler
+	enable_irq
 	b	do_undefinstr
 el0_dbg:
 	/*
@@ -505,15 +510,15 @@ el0_irq_naked:
 #endif
 	get_thread_info tsk
 #ifdef CONFIG_PREEMPT
-	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count
-	add	x23, x24, #1			// increment it
-	str	x23, [tsk, #TI_PREEMPT]
+	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
+	add	w23, w24, #1			// increment it
+	str	w23, [tsk, #TI_PREEMPT]
 #endif
 	irq_handler
 #ifdef CONFIG_PREEMPT
-	ldr	x0, [tsk, #TI_PREEMPT]
-	str	x24, [tsk, #TI_PREEMPT]
-	cmp	x0, x23
+	ldr	w0, [tsk, #TI_PREEMPT]
+	str	w24, [tsk, #TI_PREEMPT]
+	cmp	w0, w23
 	b.eq	1f
 	mov	x1, #0
 	str	x1, [x1]			// BUG
@@ -640,8 +645,9 @@ el0_svc_naked:					// compat entry point
 	enable_irq
 
 	get_thread_info tsk
-	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall tracing
-	tbnz	x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
+	tst	x16, #_TIF_SYSCALL_WORK
+	b.ne	__sys_trace
 	adr	lr, ret_fast_syscall		// return address
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys
@@ -657,9 +663,8 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	x1, sp
-	mov	w0, #0				// trace entry
-	bl	syscall_trace
+	mov	x0, sp
+	bl	syscall_trace_enter
 	adr	lr, __sys_trace_return		// return address
 	uxtw	scno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
@@ -674,9 +679,8 @@ __sys_trace:
 
 __sys_trace_return:
 	str	x0, [sp]			// save returned x0
-	mov	x1, sp
-	mov	w0, #1				// trace exit
-	bl	syscall_trace
+	mov	x0, sp
+	bl	syscall_trace_exit
 	b	ret_to_user
 
 /*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2fa308e4a1fa..522df9c7f3a4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -85,6 +86,66 @@ void fpsimd_flush_thread(void)
 	preempt_enable();
 }
 
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Kernel-side NEON support functions
+ */
+void kernel_neon_begin(void)
+{
+	/* Avoid using the NEON in interrupt context */
+	BUG_ON(in_interrupt());
+	preempt_disable();
+
+	if (current->mm)
+		fpsimd_save_state(&current->thread.fpsimd_state);
+}
+EXPORT_SYMBOL(kernel_neon_begin);
+
+void kernel_neon_end(void)
+{
+	if (current->mm)
+		fpsimd_load_state(&current->thread.fpsimd_state);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(kernel_neon_end);
+
+#endif /* CONFIG_KERNEL_MODE_NEON */
+
+#ifdef CONFIG_CPU_PM
+static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
+				  unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (current->mm)
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		break;
+	case CPU_PM_EXIT:
+		if (current->mm)
+			fpsimd_load_state(&current->thread.fpsimd_state);
+		break;
+	case CPU_PM_ENTER_FAILED:
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block fpsimd_cpu_pm_notifier_block = {
+	.notifier_call = fpsimd_cpu_pm_notifier,
+};
+
+static void fpsimd_pm_init(void)
+{
+	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
+}
+
+#else
+static inline void fpsimd_pm_init(void) { }
+#endif /* CONFIG_CPU_PM */
+
 /*
  * FP/SIMD support code initialisation.
  */
@@ -103,6 +164,8 @@ static int __init fpsimd_init(void)
 	else
 		elf_hwcap |= HWCAP_ASIMD;
 
+	fpsimd_pm_init();
+
 	return 0;
 }
 late_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 000000000000..649890a3ac4e
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,177 @@
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ * If @validate == true, a replaced instruction is checked against 'old'.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
+			      bool validate)
+{
+	u32 replaced;
+
+	/*
+	 * Note:
+	 * Due to modules and __init, code can disappear and change,
+	 * we need to protect against faulting as well as code changing.
+	 * We do this by aarch64_insn_*() which use the probe_kernel_*().
+	 *
+	 * No lock is held here because all the modifications are run
+	 * through stop_machine().
+	 */
+	if (validate) {
+		if (aarch64_insn_read((void *)pc, &replaced))
+			return -EFAULT;
+
+		if (replaced != old)
+			return -EINVAL;
+	}
+	if (aarch64_insn_patch_text_nosync((void *)pc, new))
+		return -EPERM;
+
+	return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc;
+	u32 new;
+
+	pc = (unsigned long)&ftrace_call;
+	new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
+
+	return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_nop();
+	new = aarch64_insn_gen_branch_imm(pc, addr, true);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_branch_imm(pc, addr, true);
+	new = aarch64_insn_gen_nop();
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	unsigned long old;
+	struct ftrace_graph_ent trace;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * Note:
+	 * No protection against faulting at *parent, which may be seen
+	 * on other archs. It's unlikely on AArch64.
+	 */
+	old = *parent;
+	*parent = return_hooker;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		*parent = old;
+		return;
+	}
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer);
+	if (err == -EBUSY) {
+		*parent = old;
+		return;
+	}
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	u32 branch, nop;
+
+	branch = aarch64_insn_gen_branch_imm(pc,
+			(unsigned long)ftrace_graph_caller, false);
+	nop = aarch64_insn_gen_nop();
+
+	if (enable)
+		return ftrace_modify_code(pc, nop, branch, true);
+	else
+		return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 53dcae49e729..bbc9fe1658fa 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -22,10 +22,12 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/cache.h>
 #include <asm/cputype.h>
 #include <asm/memory.h>
 #include <asm/thread_info.h>
@@ -34,29 +36,17 @@
 #include <asm/page.h>
 #include <asm/virt.h>
 
-/*
- * swapper_pg_dir is the virtual address of the initial page table. We place
- * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has
- * 2 pages and is placed below swapper_pg_dir.
- */
 #define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
 
 #if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000
 #error KERNEL_RAM_VADDR must start at 0xXXX80000
 #endif
 
-#define SWAPPER_DIR_SIZE	(3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(2 * PAGE_SIZE)
-
-	.globl	swapper_pg_dir
-	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE
-
-	.globl	idmap_pg_dir
-	.equ	idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE
-
-	.macro	pgtbl, ttb0, ttb1, phys
-	add	\ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE
-	sub	\ttb0, \ttb1, #IDMAP_DIR_SIZE
+	.macro	pgtbl, ttb0, ttb1, virt_to_phys
+	ldr	\ttb1, =swapper_pg_dir
+	ldr	\ttb0, =idmap_pg_dir
+	add	\ttb1, \ttb1, \virt_to_phys
+	add	\ttb0, \ttb0, \virt_to_phys
 	.endm
 
 #ifdef CONFIG_ARM64_64K_PAGES
@@ -107,16 +97,137 @@
 	/*
 	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
 	 */
+#ifdef CONFIG_EFI
+efi_head:
+	/*
+	 * This add instruction has no meaningful effect except that
+	 * its opcode forms the magic "MZ" signature required by UEFI.
+	 */
+	add	x13, x18, #0x16
+	b	stext
+#else
 	b	stext				// branch to kernel start, magic
 	.long	0				// reserved
+#endif
 	.quad	TEXT_OFFSET			// Image load offset from start of RAM
 	.quad	0				// reserved
 	.quad	0				// reserved
+	.quad	0				// reserved
+	.quad	0				// reserved
+	.quad	0				// reserved
+	.byte	0x41				// Magic number, "ARM\x64"
+	.byte	0x52
+	.byte	0x4d
+	.byte	0x64
+#ifdef CONFIG_EFI
+	.long	pe_header - efi_head		// Offset to the PE header.
+#else
+	.word	0				// reserved
+#endif
+
+#ifdef CONFIG_EFI
+	.align 3
+pe_header:
+	.ascii	"PE"
+	.short 	0
+coff_header:
+	.short	0xaa64				// AArch64
+	.short	2				// nr_sections
+	.long	0 				// TimeDateStamp
+	.long	0				// PointerToSymbolTable
+	.long	1				// NumberOfSymbols
+	.short	section_table - optional_header	// SizeOfOptionalHeader
+	.short	0x206				// Characteristics.
+						// IMAGE_FILE_DEBUG_STRIPPED |
+						// IMAGE_FILE_EXECUTABLE_IMAGE |
+						// IMAGE_FILE_LINE_NUMS_STRIPPED
+optional_header:
+	.short	0x20b				// PE32+ format
+	.byte	0x02				// MajorLinkerVersion
+	.byte	0x14				// MinorLinkerVersion
+	.long	_edata - stext			// SizeOfCode
+	.long	0				// SizeOfInitializedData
+	.long	0				// SizeOfUninitializedData
+	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint
+	.long	stext - efi_head		// BaseOfCode
+
+extra_header_fields:
+	.quad	0				// ImageBase
+	.long	0x20				// SectionAlignment
+	.long	0x8				// FileAlignment
+	.short	0				// MajorOperatingSystemVersion
+	.short	0				// MinorOperatingSystemVersion
+	.short	0				// MajorImageVersion
+	.short	0				// MinorImageVersion
+	.short	0				// MajorSubsystemVersion
+	.short	0				// MinorSubsystemVersion
+	.long	0				// Win32VersionValue
+
+	.long	_edata - efi_head		// SizeOfImage
+
+	// Everything before the kernel image is considered part of the header
+	.long	stext - efi_head		// SizeOfHeaders
+	.long	0				// CheckSum
+	.short	0xa				// Subsystem (EFI application)
+	.short	0				// DllCharacteristics
+	.quad	0				// SizeOfStackReserve
+	.quad	0				// SizeOfStackCommit
+	.quad	0				// SizeOfHeapReserve
+	.quad	0				// SizeOfHeapCommit
+	.long	0				// LoaderFlags
+	.long	0x6				// NumberOfRvaAndSizes
+
+	.quad	0				// ExportTable
+	.quad	0				// ImportTable
+	.quad	0				// ResourceTable
+	.quad	0				// ExceptionTable
+	.quad	0				// CertificationTable
+	.quad	0				// BaseRelocationTable
+
+	// Section table
+section_table:
+
+	/*
+	 * The EFI application loader requires a relocation section
+	 * because EFI applications must be relocatable.  This is a
+	 * dummy section as far as we are concerned.
+	 */
+	.ascii	".reloc"
+	.byte	0
+	.byte	0			// end of 0 padding of section name
+	.long	0
+	.long	0
+	.long	0			// SizeOfRawData
+	.long	0			// PointerToRawData
+	.long	0			// PointerToRelocations
+	.long	0			// PointerToLineNumbers
+	.short	0			// NumberOfRelocations
+	.short	0			// NumberOfLineNumbers
+	.long	0x42100040		// Characteristics (section flags)
+
+
+	.ascii	".text"
+	.byte	0
+	.byte	0
+	.byte	0        		// end of 0 padding of section name
+	.long	_edata - stext		// VirtualSize
+	.long	stext - efi_head	// VirtualAddress
+	.long	_edata - stext		// SizeOfRawData
+	.long	stext - efi_head	// PointerToRawData
+
+	.long	0		// PointerToRelocations (0 for executables)
+	.long	0		// PointerToLineNumbers (0 for executables)
+	.short	0		// NumberOfRelocations  (0 for executables)
+	.short	0		// NumberOfLineNumbers  (0 for executables)
+	.long	0xe0500020	// Characteristics (section flags)
+	.align 5
+#endif
 
 ENTRY(stext)
 	mov	x21, x0				// x21=FDT
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
 	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
-	bl	el2_setup			// Drop to EL1
+	bl	set_cpu_boot_mode_flag
 	mrs	x22, midr_el1			// x22=cpuid
 	mov	x0, x22
 	bl	lookup_processor_type
@@ -142,21 +253,30 @@ ENDPROC(stext)
 /*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
+ *
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
+ * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
 	mrs	x0, CurrentEL
 	cmp	x0, #PSR_MODE_EL2t
 	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
-	ldr	x0, =__boot_cpu_mode		// Compute __boot_cpu_mode
-	add	x0, x0, x28
-	b.eq	1f
-	str	wzr, [x0]			// Remember we don't have EL2...
+	b.ne	1f
+	mrs	x0, sctlr_el2
+CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2
+CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2
+	msr	sctlr_el2, x0
+	b	2f
+1:	mrs	x0, sctlr_el1
+CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1
+CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
+	msr	sctlr_el1, x0
+	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
+	isb
 	ret
 
 	/* Hyp configuration. */
-1:	ldr	w1, =BOOT_CPU_MODE_EL2
-	str	w1, [x0, #4]			// This CPU has EL2
-	mov	x0, #(1 << 31)			// 64-bit EL1
+2:	mov	x0, #(1 << 31)			// 64-bit EL1
 	msr	hcr_el2, x0
 
 	/* Generic timers. */
@@ -165,6 +285,23 @@ ENTRY(el2_setup)
 	msr	cnthctl_el2, x0
 	msr	cntvoff_el2, xzr		// Clear virtual offset
 
+#ifdef CONFIG_ARM_GIC_V3
+	/* GICv3 system register access */
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #24, #4
+	cmp	x0, #1
+	b.ne	3f
+
+	mrs_s	x0, ICC_SRE_EL2
+	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
+	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
+	msr_s	ICC_SRE_EL2, x0
+	isb					// Make sure SRE is now set
+	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
 	/* Populate ID registers. */
 	mrs	x0, midr_el1
 	mrs	x1, mpidr_el1
@@ -173,7 +310,8 @@ ENTRY(el2_setup)
 
 	/* sctlr_el1 */
 	mov	x0, #0x0800			// Set/clear RES{1,0} bits
-	movk	x0, #0x30d0, lsl #16
+CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems
+CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	msr	sctlr_el1, x0
 
 	/* Coprocessor traps. */
@@ -196,18 +334,34 @@ ENTRY(el2_setup)
 		      PSR_MODE_EL1h)
 	msr	spsr_el2, x0
 	msr	elr_el2, lr
+	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	eret
 ENDPROC(el2_setup)
 
 /*
+ * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
+ * in x20. See arch/arm64/include/asm/virt.h for more info.
+ */
+ENTRY(set_cpu_boot_mode_flag)
+	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode
+	add	x1, x1, x28
+	cmp	w20, #BOOT_CPU_MODE_EL2
+	b.ne	1f
+	add	x1, x1, #4
+1:	str	w20, [x1]			// This CPU has booted in EL1
+	ret
+ENDPROC(set_cpu_boot_mode_flag)
+
+/*
  * We need to find out the CPU boot mode long after boot, so we need to
  * store it in a writable variable.
  *
  * This is not in .bss, because we set it sufficiently early that the boot-time
  * zeroing of .bss would clobber it.
  */
-	.pushsection	.data
+	.pushsection	.data..cacheline_aligned
 ENTRY(__boot_cpu_mode)
+	.align	L1_CACHE_SHIFT
 	.long	BOOT_CPU_MODE_EL2
 	.long	0
 	.popsection
@@ -217,7 +371,6 @@ ENTRY(__boot_cpu_mode)
 	.quad	PAGE_OFFSET
 
 #ifdef CONFIG_SMP
-	.pushsection    .smp.pen.text, "ax"
 	.align	3
 1:	.quad	.
 	.quad	secondary_holding_pen_release
@@ -227,8 +380,9 @@ ENTRY(__boot_cpu_mode)
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
-	bl	__calc_phys_offset		// x24=phys offset
-	bl	el2_setup			// Drop to EL1
+	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
+	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
 	ldr     x1, =MPIDR_HWID_BITMASK
 	and	x0, x0, x1
@@ -242,7 +396,16 @@ pen:	ldr	x4, [x3]
 	wfe
 	b	pen
 ENDPROC(secondary_holding_pen)
-	.popsection
+
+	/*
+	 * Secondary entry point that jumps straight into the kernel. Only to
+	 * be used where CPUs are brought online dynamically by the kernel.
+	 */
+ENTRY(secondary_entry)
+	bl	__calc_phys_offset		// x2=phys offset
+	bl	el2_setup			// Drop to EL1
+	b	secondary_startup
+ENDPROC(secondary_entry)
 
 ENTRY(secondary_startup)
 	/*
@@ -254,7 +417,7 @@ ENTRY(secondary_startup)
 	mov	x23, x0				// x23=current cpu_table
 	cbz	x23, __error_p			// invalid processor (x23=0)?
 
-	pgtbl	x25, x26, x24			// x25=TTBR0, x26=TTBR1
+	pgtbl	x25, x26, x28			// x25=TTBR0, x26=TTBR1
 	ldr	x12, [x23, #CPU_INFO_SETUP]
 	add	x12, x12, x28			// __virt_to_phys
 	blr	x12				// initialise processor
@@ -296,8 +459,13 @@ ENDPROC(__enable_mmu)
  *  x27 = *virtual* address to jump to upon completion
  *
  * other registers depend on the function called upon completion
+ *
+ * We align the entire function to the smallest power of two larger than it to
+ * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
+ * close to the end of a 512MB or 1GB block we might require an additional
+ * table to map the entire function.
  */
-	.align	6
+	.align	4
 __turn_mmu_on:
 	msr	sctlr_el1, x0
 	isb
@@ -340,26 +508,18 @@ ENDPROC(__calc_phys_offset)
  * Preserves:	tbl, flags
  * Corrupts:	phys, start, end, pstate
  */
-	.macro	create_block_map, tbl, flags, phys, start, end, idmap=0
+	.macro	create_block_map, tbl, flags, phys, start, end
 	lsr	\phys, \phys, #BLOCK_SHIFT
-	.if	\idmap
-	and	\start, \phys, #PTRS_PER_PTE - 1	// table index
-	.else
 	lsr	\start, \start, #BLOCK_SHIFT
 	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	.endif
 	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
-	.ifnc	\start,\end
 	lsr	\end, \end, #BLOCK_SHIFT
 	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
-	.endif
 9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
-	.ifnc	\start,\end
 	add	\start, \start, #1			// next entry
 	add	\phys, \phys, #BLOCK_SIZE		// next block
 	cmp	\start, \end
 	b.ls	9999b
-	.endif
 	.endm
 
 /*
@@ -368,10 +528,19 @@ ENDPROC(__calc_phys_offset)
  *   - identity mapping to enable the MMU (low address, TTBR0)
  *   - first few MB of the kernel linear mapping to jump to once the MMU has
  *     been enabled, including the FDT blob (TTBR1)
- *   - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1)
+ *   - pgd entry for fixed mappings (TTBR1)
  */
 __create_page_tables:
-	pgtbl	x25, x26, x24			// idmap_pg_dir and swapper_pg_dir addresses
+	pgtbl	x25, x26, x28			// idmap_pg_dir and swapper_pg_dir addresses
+	mov	x27, lr
+
+	/*
+	 * Invalidate the idmap and swapper page tables to avoid potential
+	 * dirty cache lines being evicted.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	bl	__inval_cache_range
 
 	/*
 	 * Clear the idmap and swapper page tables.
@@ -391,9 +560,13 @@ __create_page_tables:
 	 * Create the identity mapping.
 	 */
 	add	x0, x25, #PAGE_SIZE		// section table address
-	adr	x3, __turn_mmu_on		// virtual/physical address
+	ldr	x3, =KERNEL_START
+	add	x3, x3, x28			// __pa(KERNEL_START)
 	create_pgd_entry x25, x0, x3, x5, x6
-	create_block_map x0, x7, x3, x5, x5, idmap=1
+	ldr	x6, =KERNEL_END
+	mov	x5, x3				// __pa(KERNEL_START)
+	add	x6, x6, x28			// __pa(KERNEL_END)
+	create_block_map x0, x7, x3, x5, x6
 
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
@@ -401,7 +574,7 @@ __create_page_tables:
 	add	x0, x26, #PAGE_SIZE		// section table address
 	mov	x5, #PAGE_OFFSET
 	create_pgd_entry x26, x0, x5, x3, x6
-	ldr	x6, =KERNEL_END - 1
+	ldr	x6, =KERNEL_END
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
@@ -421,15 +594,23 @@ __create_page_tables:
 	sub	x6, x6, #1			// inclusive range
 	create_block_map x0, x7, x3, x5, x6
 1:
-#ifdef CONFIG_EARLY_PRINTK
 	/*
-	 * Create the pgd entry for the UART mapping. The full mapping is done
-	 * later based earlyprintk kernel parameter.
+	 * Create the pgd entry for the fixed mappings.
 	 */
-	ldr	x5, =EARLYCON_IOBASE		// UART virtual address
+	ldr	x5, =FIXADDR_TOP		// Fixed mapping virtual address
 	add	x0, x26, #2 * PAGE_SIZE		// section table address
 	create_pgd_entry x26, x0, x5, x6, x7
-#endif
+
+	/*
+	 * Since the page tables have been populated with non-cacheable
+	 * accesses (MMU disabled), invalidate the idmap and swapper page
+	 * tables again to remove any speculatively loaded cache lines.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	bl	__inval_cache_range
+
+	mov	lr, x27
 	ret
 ENDPROC(__create_page_tables)
 	.ltorg
@@ -438,10 +619,8 @@ ENDPROC(__create_page_tables)
 	.type	__switch_data, %object
 __switch_data:
 	.quad	__mmap_switched
-	.quad	__data_loc			// x4
-	.quad	_data				// x5
 	.quad	__bss_start			// x6
-	.quad	_end				// x7
+	.quad	__bss_stop			// x7
 	.quad	processor_id			// x4
 	.quad	__fdt_pointer			// x5
 	.quad	memstart_addr			// x6
@@ -454,15 +633,7 @@ __switch_data:
 __mmap_switched:
 	adr	x3, __switch_data + 8
 
-	ldp	x4, x5, [x3], #16
 	ldp	x6, x7, [x3], #16
-	cmp	x4, x5				// Copy data segment if needed
-1:	ccmp	x5, x6, #4, ne
-	b.eq	2f
-	ldr	x16, [x4], #8
-	str	x16, [x5], #8
-	b	1b
-2:
 1:	cmp	x6, x7
 	b.hs	2f
 	str	xzr, [x6], #8			// Clear BSS
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 5ab825c59db9..6de3460ede4c 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -20,13 +20,14 @@
 
 #define pr_fmt(fmt) "hw-breakpoint: " fmt
 
+#include <linux/compat.h>
+#include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
 
-#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
@@ -169,15 +170,68 @@ static enum debug_el debug_exception_level(int privilege)
 	}
 }
 
-/*
- * Install a perf counter breakpoint.
+enum hw_breakpoint_ops {
+	HW_BREAKPOINT_INSTALL,
+	HW_BREAKPOINT_UNINSTALL,
+	HW_BREAKPOINT_RESTORE
+};
+
+/**
+ * hw_breakpoint_slot_setup - Find and setup a perf slot according to
+ *			      operations
+ *
+ * @slots: pointer to array of slots
+ * @max_slots: max number of slots
+ * @bp: perf_event to setup
+ * @ops: operation to be carried out on the slot
+ *
+ * Return:
+ *	slot index on success
+ *	-ENOSPC if no slot is available/matches
+ *	-EINVAL on wrong operations parameter
  */
-int arch_install_hw_breakpoint(struct perf_event *bp)
+static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
+				    struct perf_event *bp,
+				    enum hw_breakpoint_ops ops)
+{
+	int i;
+	struct perf_event **slot;
+
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+		switch (ops) {
+		case HW_BREAKPOINT_INSTALL:
+			if (!*slot) {
+				*slot = bp;
+				return i;
+			}
+			break;
+		case HW_BREAKPOINT_UNINSTALL:
+			if (*slot == bp) {
+				*slot = NULL;
+				return i;
+			}
+			break;
+		case HW_BREAKPOINT_RESTORE:
+			if (*slot == bp)
+				return i;
+			break;
+		default:
+			pr_warn_once("Unhandled hw breakpoint ops %d\n", ops);
+			return -EINVAL;
+		}
+	}
+	return -ENOSPC;
+}
+
+static int hw_breakpoint_control(struct perf_event *bp,
+				 enum hw_breakpoint_ops ops)
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-	struct perf_event **slot, **slots;
+	struct perf_event **slots;
 	struct debug_info *debug_info = &current->thread.debug;
 	int i, max_slots, ctrl_reg, val_reg, reg_enable;
+	enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege);
 	u32 ctrl;
 
 	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
@@ -196,67 +250,54 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		reg_enable = !debug_info->wps_disabled;
 	}
 
-	for (i = 0; i < max_slots; ++i) {
-		slot = &slots[i];
-
-		if (!*slot) {
-			*slot = bp;
-			break;
-		}
-	}
-
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
-		return -ENOSPC;
+	i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops);
 
-	/* Ensure debug monitors are enabled at the correct exception level.  */
-	enable_debug_monitors(debug_exception_level(info->ctrl.privilege));
+	if (WARN_ONCE(i < 0, "Can't find any breakpoint slot"))
+		return i;
 
-	/* Setup the address register. */
-	write_wb_reg(val_reg, i, info->address);
+	switch (ops) {
+	case HW_BREAKPOINT_INSTALL:
+		/*
+		 * Ensure debug monitors are enabled at the correct exception
+		 * level.
+		 */
+		enable_debug_monitors(dbg_el);
+		/* Fall through */
+	case HW_BREAKPOINT_RESTORE:
+		/* Setup the address register. */
+		write_wb_reg(val_reg, i, info->address);
+
+		/* Setup the control register. */
+		ctrl = encode_ctrl_reg(info->ctrl);
+		write_wb_reg(ctrl_reg, i,
+			     reg_enable ? ctrl | 0x1 : ctrl & ~0x1);
+		break;
+	case HW_BREAKPOINT_UNINSTALL:
+		/* Reset the control register. */
+		write_wb_reg(ctrl_reg, i, 0);
 
-	/* Setup the control register. */
-	ctrl = encode_ctrl_reg(info->ctrl);
-	write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1);
+		/*
+		 * Release the debug monitors for the correct exception
+		 * level.
+		 */
+		disable_debug_monitors(dbg_el);
+		break;
+	}
 
 	return 0;
 }
 
-void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
 {
-	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-	struct perf_event **slot, **slots;
-	int i, max_slots, base;
-
-	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
-		/* Breakpoint */
-		base = AARCH64_DBG_REG_BCR;
-		slots = __get_cpu_var(bp_on_reg);
-		max_slots = core_num_brps;
-	} else {
-		/* Watchpoint */
-		base = AARCH64_DBG_REG_WCR;
-		slots = __get_cpu_var(wp_on_reg);
-		max_slots = core_num_wrps;
-	}
-
-	/* Remove the breakpoint. */
-	for (i = 0; i < max_slots; ++i) {
-		slot = &slots[i];
-
-		if (*slot == bp) {
-			*slot = NULL;
-			break;
-		}
-	}
-
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
-		return;
-
-	/* Reset the control register. */
-	write_wb_reg(base, i, 0);
+	return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL);
+}
 
-	/* Release the debug monitors for the correct exception level.  */
-	disable_debug_monitors(debug_exception_level(info->ctrl.privilege));
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL);
 }
 
 static int get_hbp_len(u8 hbp_len)
@@ -806,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next)
 /*
  * CPU initialisation.
  */
-static void reset_ctrl_regs(void *unused)
+static void hw_breakpoint_reset(void *unused)
 {
 	int i;
-
-	for (i = 0; i < core_num_brps; ++i) {
-		write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL);
-		write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL);
+	struct perf_event **slots;
+	/*
+	 * When a CPU goes through cold-boot, it does not have any installed
+	 * slot, so it is safe to share the same function for restoring and
+	 * resetting breakpoints; when a CPU is hotplugged in, it goes
+	 * through the slots, which are all empty, hence it just resets control
+	 * and value for debug registers.
+	 * When this function is triggered on warm-boot through a CPU PM
+	 * notifier some slots might be initialized; if so they are
+	 * reprogrammed according to the debug slots content.
+	 */
+	for (slots = __get_cpu_var(bp_on_reg), i = 0; i < core_num_brps; ++i) {
+		if (slots[i]) {
+			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE);
+		} else {
+			write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL);
+			write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL);
+		}
 	}
 
-	for (i = 0; i < core_num_wrps; ++i) {
-		write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL);
-		write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
+	for (slots = __get_cpu_var(wp_on_reg), i = 0; i < core_num_wrps; ++i) {
+		if (slots[i]) {
+			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE);
+		} else {
+			write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL);
+			write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
+		}
 	}
 }
 
@@ -827,7 +886,7 @@ static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self,
 {
 	int cpu = (long)hcpu;
 	if (action == CPU_ONLINE)
-		smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1);
+		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
 	return NOTIFY_OK;
 }
 
@@ -835,6 +894,14 @@ static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = {
 	.notifier_call = hw_breakpoint_reset_notify,
 };
 
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *));
+#else
+static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+{
+}
+#endif
+
 /*
  * One-time initialisation.
  */
@@ -850,8 +917,8 @@ static int __init arch_hw_breakpoint_init(void)
 	 * Reset the breakpoint resources. We assume that a halting
 	 * debugger will leave the world in a nice state for us.
 	 */
-	smp_call_function(reset_ctrl_regs, NULL, 1);
-	reset_ctrl_regs(NULL);
+	smp_call_function(hw_breakpoint_reset, NULL, 1);
+	hw_breakpoint_reset(NULL);
 
 	/* Register debug fault handlers. */
 	hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
@@ -861,6 +928,8 @@ static int __init arch_hw_breakpoint_init(void)
 
 	/* Register hotplug notifier. */
 	register_cpu_notifier(&hw_breakpoint_reset_nb);
+	/* Register cpu_suspend hw breakpoint restore hook */
+	cpu_suspend_set_dbg_restorer(hw_breakpoint_reset);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 0959611d9ff1..a272f335c289 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -19,6 +19,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
new file mode 100644
index 000000000000..92f36835486b
--- /dev/null
+++ b/arch/arm64/kernel/insn.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/insn.h>
+
+static int aarch64_insn_encoding_class[] = {
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_REG,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_FPSIMD,
+	AARCH64_INSN_CLS_DP_IMM,
+	AARCH64_INSN_CLS_DP_IMM,
+	AARCH64_INSN_CLS_BR_SYS,
+	AARCH64_INSN_CLS_BR_SYS,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_REG,
+	AARCH64_INSN_CLS_LDST,
+	AARCH64_INSN_CLS_DP_FPSIMD,
+};
+
+enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
+{
+	return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
+}
+
+/* NOP is an alias of HINT */
+bool __kprobes aarch64_insn_is_nop(u32 insn)
+{
+	if (!aarch64_insn_is_hint(insn))
+		return false;
+
+	switch (insn & 0xFE0) {
+	case AARCH64_INSN_HINT_YIELD:
+	case AARCH64_INSN_HINT_WFE:
+	case AARCH64_INSN_HINT_WFI:
+	case AARCH64_INSN_HINT_SEV:
+	case AARCH64_INSN_HINT_SEVL:
+		return false;
+	default:
+		return true;
+	}
+}
+
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	u32 val;
+
+	ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
+	if (!ret)
+		*insnp = le32_to_cpu(val);
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+	insn = cpu_to_le32(insn);
+	return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE);
+}
+
+static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
+{
+	if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS)
+		return false;
+
+	return	aarch64_insn_is_b(insn) ||
+		aarch64_insn_is_bl(insn) ||
+		aarch64_insn_is_svc(insn) ||
+		aarch64_insn_is_hvc(insn) ||
+		aarch64_insn_is_smc(insn) ||
+		aarch64_insn_is_brk(insn) ||
+		aarch64_insn_is_nop(insn);
+}
+
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section B2.6.5 "Concurrent modification and execution of instructions":
+ * Concurrent modification and execution of instructions can lead to the
+ * resulting instruction performing any behavior that can be achieved by
+ * executing any sequence of instructions that can be executed from the
+ * same Exception level, except where the instruction before modification
+ * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC,
+ * or SMC instruction.
+ */
+bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
+{
+	return __aarch64_insn_hotpatch_safe(old_insn) &&
+	       __aarch64_insn_hotpatch_safe(new_insn);
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+	u32 *tp = addr;
+	int ret;
+
+	/* A64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+
+	ret = aarch64_insn_write(tp, insn);
+	if (ret == 0)
+		flush_icache_range((uintptr_t)tp,
+				   (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+	return ret;
+}
+
+struct aarch64_insn_patch {
+	void		**text_addrs;
+	u32		*new_insns;
+	int		insn_cnt;
+	atomic_t	cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+	int i, ret = 0;
+	struct aarch64_insn_patch *pp = arg;
+
+	/* The first CPU becomes master */
+	if (atomic_inc_return(&pp->cpu_count) == 1) {
+		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+							     pp->new_insns[i]);
+		/*
+		 * aarch64_insn_patch_text_nosync() calls flush_icache_range(),
+		 * which ends with "dsb; isb" pair guaranteeing global
+		 * visibility.
+		 */
+		atomic_set(&pp->cpu_count, -1);
+	} else {
+		while (atomic_read(&pp->cpu_count) != -1)
+			cpu_relax();
+		isb();
+	}
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
+{
+	struct aarch64_insn_patch patch = {
+		.text_addrs = addrs,
+		.new_insns = insns,
+		.insn_cnt = cnt,
+		.cpu_count = ATOMIC_INIT(0),
+	};
+
+	if (cnt <= 0)
+		return -EINVAL;
+
+	return stop_machine(aarch64_insn_patch_text_cb, &patch,
+			    cpu_online_mask);
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+	int ret;
+	u32 insn;
+
+	/* Unsafe to patch multiple instructions without synchronizaiton */
+	if (cnt == 1) {
+		ret = aarch64_insn_read(addrs[0], &insn);
+		if (ret)
+			return ret;
+
+		if (aarch64_insn_hotpatch_safe(insn, insns[0])) {
+			/*
+			 * ARMv8 architecture doesn't guarantee all CPUs see
+			 * the new instruction after returning from function
+			 * aarch64_insn_patch_text_nosync(). So send IPIs to
+			 * all other CPUs to achieve instruction
+			 * synchronization.
+			 */
+			ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]);
+			kick_all_cpus_sync();
+			return ret;
+		}
+	}
+
+	return aarch64_insn_patch_text_sync(addrs, insns, cnt);
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm)
+{
+	u32 immlo, immhi, lomask, himask, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		lomask = 0x3;
+		himask = 0x7ffff;
+		immlo = imm & lomask;
+		imm >>= 2;
+		immhi = imm & himask;
+		imm = (immlo << 24) | (immhi);
+		mask = (lomask << 24) | (himask);
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_26:
+		mask = BIT(26) - 1;
+		shift = 0;
+		break;
+	case AARCH64_INSN_IMM_19:
+		mask = BIT(19) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_16:
+		mask = BIT(16) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_14:
+		mask = BIT(14) - 1;
+		shift = 5;
+		break;
+	case AARCH64_INSN_IMM_12:
+		mask = BIT(12) - 1;
+		shift = 10;
+		break;
+	case AARCH64_INSN_IMM_9:
+		mask = BIT(9) - 1;
+		shift = 12;
+		break;
+	default:
+		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			type);
+		return 0;
+	}
+
+	/* Update the immediate field. */
+	insn &= ~(mask << shift);
+	insn |= (imm & mask) << shift;
+
+	return insn;
+}
+
+u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+					  enum aarch64_insn_branch_type type)
+{
+	u32 insn;
+	long offset;
+
+	/*
+	 * PC: A 64-bit Program Counter holding the address of the current
+	 * instruction. A64 instructions must be word-aligned.
+	 */
+	BUG_ON((pc & 0x3) || (addr & 0x3));
+
+	/*
+	 * B/BL support [-128M, 128M) offset
+	 * ARM64 virtual address arrangement guarantees all kernel and module
+	 * texts are within +/-128M.
+	 */
+	offset = ((long)addr - (long)pc);
+	BUG_ON(offset < -SZ_128M || offset >= SZ_128M);
+
+	if (type == AARCH64_INSN_BRANCH_LINK)
+		insn = aarch64_insn_get_bl_value();
+	else
+		insn = aarch64_insn_get_b_value();
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
+					     offset >> 2);
+}
+
+u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
+{
+	return aarch64_insn_get_hint_value() | op;
+}
+
+u32 __kprobes aarch64_insn_gen_nop(void)
+{
+	return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index ecb3354292ed..473e5dbf8f39 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -81,3 +81,64 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
+	bool ret = false;
+
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
+		ret = true;
+	}
+
+	c = irq_data_get_irq_chip(d);
+	if (!c->irq_set_affinity)
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+	else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret)
+		cpumask_copy(d->affinity, affinity);
+
+	return ret;
+}
+
+/*
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for_each_irq_desc(i, desc) {
+		bool affinity_broken;
+
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    i, smp_processor_id());
+	}
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
new file mode 100644
index 000000000000..263a166291fb
--- /dev/null
+++ b/arch/arm64/kernel/jump_label.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Based on arch/arm/kernel/jump_label.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/insn.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static void __arch_jump_label_transform(struct jump_entry *entry,
+					enum jump_label_type type,
+					bool is_static)
+{
+	void *addr = (void *)entry->code;
+	u32 insn;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		insn = aarch64_insn_gen_branch_imm(entry->code,
+						   entry->target,
+						   AARCH64_INSN_BRANCH_NOLINK);
+	} else {
+		insn = aarch64_insn_gen_nop();
+	}
+
+	if (is_static)
+		aarch64_insn_patch_text_nosync(addr, insn);
+	else
+		aarch64_insn_patch_text(&addr, &insn, 1);
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	__arch_jump_label_transform(entry, type, false);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	__arch_jump_label_transform(entry, type, true);
+}
+
+#endif	/* HAVE_JUMP_LABEL */
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
new file mode 100644
index 000000000000..75c9cf1aafee
--- /dev/null
+++ b/arch/arm64/kernel/kgdb.c
@@ -0,0 +1,336 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/kernel/kgdb.c
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ "x0", 8, offsetof(struct pt_regs, regs[0])},
+	{ "x1", 8, offsetof(struct pt_regs, regs[1])},
+	{ "x2", 8, offsetof(struct pt_regs, regs[2])},
+	{ "x3", 8, offsetof(struct pt_regs, regs[3])},
+	{ "x4", 8, offsetof(struct pt_regs, regs[4])},
+	{ "x5", 8, offsetof(struct pt_regs, regs[5])},
+	{ "x6", 8, offsetof(struct pt_regs, regs[6])},
+	{ "x7", 8, offsetof(struct pt_regs, regs[7])},
+	{ "x8", 8, offsetof(struct pt_regs, regs[8])},
+	{ "x9", 8, offsetof(struct pt_regs, regs[9])},
+	{ "x10", 8, offsetof(struct pt_regs, regs[10])},
+	{ "x11", 8, offsetof(struct pt_regs, regs[11])},
+	{ "x12", 8, offsetof(struct pt_regs, regs[12])},
+	{ "x13", 8, offsetof(struct pt_regs, regs[13])},
+	{ "x14", 8, offsetof(struct pt_regs, regs[14])},
+	{ "x15", 8, offsetof(struct pt_regs, regs[15])},
+	{ "x16", 8, offsetof(struct pt_regs, regs[16])},
+	{ "x17", 8, offsetof(struct pt_regs, regs[17])},
+	{ "x18", 8, offsetof(struct pt_regs, regs[18])},
+	{ "x19", 8, offsetof(struct pt_regs, regs[19])},
+	{ "x20", 8, offsetof(struct pt_regs, regs[20])},
+	{ "x21", 8, offsetof(struct pt_regs, regs[21])},
+	{ "x22", 8, offsetof(struct pt_regs, regs[22])},
+	{ "x23", 8, offsetof(struct pt_regs, regs[23])},
+	{ "x24", 8, offsetof(struct pt_regs, regs[24])},
+	{ "x25", 8, offsetof(struct pt_regs, regs[25])},
+	{ "x26", 8, offsetof(struct pt_regs, regs[26])},
+	{ "x27", 8, offsetof(struct pt_regs, regs[27])},
+	{ "x28", 8, offsetof(struct pt_regs, regs[28])},
+	{ "x29", 8, offsetof(struct pt_regs, regs[29])},
+	{ "x30", 8, offsetof(struct pt_regs, regs[30])},
+	{ "sp", 8, offsetof(struct pt_regs, sp)},
+	{ "pc", 8, offsetof(struct pt_regs, pc)},
+	{ "pstate", 8, offsetof(struct pt_regs, pstate)},
+	{ "v0", 16, -1 },
+	{ "v1", 16, -1 },
+	{ "v2", 16, -1 },
+	{ "v3", 16, -1 },
+	{ "v4", 16, -1 },
+	{ "v5", 16, -1 },
+	{ "v6", 16, -1 },
+	{ "v7", 16, -1 },
+	{ "v8", 16, -1 },
+	{ "v9", 16, -1 },
+	{ "v10", 16, -1 },
+	{ "v11", 16, -1 },
+	{ "v12", 16, -1 },
+	{ "v13", 16, -1 },
+	{ "v14", 16, -1 },
+	{ "v15", 16, -1 },
+	{ "v16", 16, -1 },
+	{ "v17", 16, -1 },
+	{ "v18", 16, -1 },
+	{ "v19", 16, -1 },
+	{ "v20", 16, -1 },
+	{ "v21", 16, -1 },
+	{ "v22", 16, -1 },
+	{ "v23", 16, -1 },
+	{ "v24", 16, -1 },
+	{ "v25", 16, -1 },
+	{ "v26", 16, -1 },
+	{ "v27", 16, -1 },
+	{ "v28", 16, -1 },
+	{ "v29", 16, -1 },
+	{ "v30", 16, -1 },
+	{ "v31", 16, -1 },
+	{ "fpsr", 4, -1 },
+	{ "fpcr", 4, -1 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+		       dbg_reg_def[regno].size);
+	else
+		memset(mem, 0, dbg_reg_def[regno].size);
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (dbg_reg_def[regno].offset != -1)
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+		       dbg_reg_def[regno].size);
+	return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+	struct pt_regs *thread_regs;
+
+	/* Initialize to zero */
+	memset((char *)gdb_regs, 0, NUMREGBYTES);
+	thread_regs = task_pt_regs(task);
+	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->pc = pc;
+}
+
+static int compiled_break;
+
+static void kgdb_arch_update_addr(struct pt_regs *regs,
+				char *remcom_in_buffer)
+{
+	unsigned long addr;
+	char *ptr;
+
+	ptr = &remcom_in_buffer[1];
+	if (kgdb_hex2long(&ptr, &addr))
+		kgdb_arch_set_pc(regs, addr);
+	else if (compiled_break == 1)
+		kgdb_arch_set_pc(regs, regs->pc + 4);
+
+	compiled_break = 0;
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+			       int err_code, char *remcom_in_buffer,
+			       char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	int err;
+
+	switch (remcom_in_buffer[0]) {
+	case 'D':
+	case 'k':
+		/*
+		 * Packet D (Detach), k (kill). No special handling
+		 * is required here. Handle same as c packet.
+		 */
+	case 'c':
+		/*
+		 * Packet c (Continue) to continue executing.
+		 * Set pc to required address.
+		 * Try to read optional parameter and set pc.
+		 * If this was a compiled breakpoint, we need to move
+		 * to the next instruction else we will just breakpoint
+		 * over and over again.
+		 */
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+		kgdb_single_step =  0;
+
+		/*
+		 * Received continue command, disable single step
+		 */
+		if (kernel_active_single_step())
+			kernel_disable_single_step();
+
+		err = 0;
+		break;
+	case 's':
+		/*
+		 * Update step address value with address passed
+		 * with step packet.
+		 * On debug exception return PC is copied to ELR
+		 * So just update PC.
+		 * If no step address is passed, resume from the address
+		 * pointed by PC. Do not update PC
+		 */
+		kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+		kgdb_single_step =  1;
+
+		/*
+		 * Enable single step handling
+		 */
+		if (!kernel_active_single_step())
+			kernel_enable_single_step(linux_regs);
+		err = 0;
+		break;
+	default:
+		err = -1;
+	}
+	return err;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	compiled_break = 1;
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+	return 0;
+}
+
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+	return 0;
+}
+
+static struct break_hook kgdb_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
+	.fn		= kgdb_brk_fn
+};
+
+static struct break_hook kgdb_compiled_brkpt_hook = {
+	.esr_mask	= 0xffffffff,
+	.esr_val	= DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM),
+	.fn		= kgdb_compiled_brk_fn
+};
+
+static struct step_hook kgdb_step_hook = {
+	.fn		= kgdb_step_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	if (kgdb_handle_exception(1, args->signr, cmd, regs))
+		return NOTIFY_DONE;
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call	= kgdb_notify,
+	/*
+	 * Want to be lowest priority
+	 */
+	.priority	= -INT_MAX,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	int ret = register_die_notifier(&kgdb_notifier);
+
+	if (ret != 0)
+		return ret;
+
+	register_break_hook(&kgdb_brkpt_hook);
+	register_break_hook(&kgdb_compiled_brkpt_hook);
+	register_step_hook(&kgdb_step_hook);
+	return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_break_hook(&kgdb_brkpt_hook);
+	unregister_break_hook(&kgdb_compiled_brkpt_hook);
+	unregister_step_hook(&kgdb_step_hook);
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * ARM instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+	.gdb_bpt_instr = {
+		KGDB_DYN_BRK_INS_BYTE0,
+		KGDB_DYN_BRK_INS_BYTE1,
+		KGDB_DYN_BRK_INS_BYTE2,
+		KGDB_DYN_BRK_INS_BYTE3,
+	}
+};
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 8b69ecb1d8bc..7787208e8cc6 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -27,6 +27,9 @@
  *
  * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
+
+#include <asm/unistd32.h>
+
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -35,33 +38,32 @@ __kuser_cmpxchg64:			// 0xffff0f60
 	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
 	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
 	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xf57ff05f		//	dmb		sy
 	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
 	.inst	0xe0303004		//	eors		r3, r0, r4
 	.inst	0x00313005		//	eoreqs		r3, r1, r5
-	.inst	0x01a23f96		//	strexdeq	r3, r6, [r2]
+	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffff9		//	beq		1b
-	.inst	0xf57ff05f		//	dmb		sy
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
 	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_memory_barrier:			// 0xffff0fa0
-	.inst	0xf57ff05f		//	dmb		sy
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xf57ff05f		//	dmb		sy
 	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
 	.inst	0xe0533000		//	subs		r3, r3, r0
-	.inst	0x01823f91		//	strexeq	r3, r1, [r2]
+	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffffa		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
-	.inst	0xeaffffef		//	b		<__kuser_memory_barrier>
+	.inst	0xe12fff1e		//	bx		lr
 
 	.align	5
 __kuser_get_tls:			// 0xffff0fe0
@@ -75,3 +77,42 @@ __kuser_helper_version:			// 0xffff0ffc
 	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
 	.globl	__kuser_helper_end
 __kuser_helper_end:
+
+/*
+ * AArch32 sigreturn code
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+	.globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+	/*
+	 * ARM Code
+	 */
+	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
+
+	/*
+	 * ARM code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
+
+        .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index ca0e3d55da99..df08a6e0287d 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -25,6 +25,10 @@
 #include <linux/mm.h>
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
+#include <asm/insn.h>
+
+#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX
+#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16
 
 void *module_alloc(unsigned long size)
 {
@@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 	return 0;
 }
 
-enum aarch64_imm_type {
-	INSN_IMM_MOVNZ,
-	INSN_IMM_MOVK,
-	INSN_IMM_ADR,
-	INSN_IMM_26,
-	INSN_IMM_19,
-	INSN_IMM_16,
-	INSN_IMM_14,
-	INSN_IMM_12,
-	INSN_IMM_9,
-};
-
-static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
+static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
+			   int lsb, enum aarch64_insn_imm_type imm_type)
 {
-	u32 immlo, immhi, lomask, himask, mask;
-	int shift;
+	u64 imm, limit = 0;
+	s64 sval;
+	u32 insn = le32_to_cpu(*(u32 *)place);
+
+	sval = do_reloc(op, place, val);
+	sval >>= lsb;
+	imm = sval & 0xffff;
 
-	switch (type) {
-	case INSN_IMM_MOVNZ:
+	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
 		/*
 		 * For signed MOVW relocations, we have to manipulate the
 		 * instruction encoding depending on whether or not the
@@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
 			 */
 			imm = ~imm;
 		}
-	case INSN_IMM_MOVK:
-		mask = BIT(16) - 1;
-		shift = 5;
-		break;
-	case INSN_IMM_ADR:
-		lomask = 0x3;
-		himask = 0x7ffff;
-		immlo = imm & lomask;
-		imm >>= 2;
-		immhi = imm & himask;
-		imm = (immlo << 24) | (immhi);
-		mask = (lomask << 24) | (himask);
-		shift = 5;
-		break;
-	case INSN_IMM_26:
-		mask = BIT(26) - 1;
-		shift = 0;
-		break;
-	case INSN_IMM_19:
-		mask = BIT(19) - 1;
-		shift = 5;
-		break;
-	case INSN_IMM_16:
-		mask = BIT(16) - 1;
-		shift = 5;
-		break;
-	case INSN_IMM_14:
-		mask = BIT(14) - 1;
-		shift = 5;
-		break;
-	case INSN_IMM_12:
-		mask = BIT(12) - 1;
-		shift = 10;
-		break;
-	case INSN_IMM_9:
-		mask = BIT(9) - 1;
-		shift = 12;
-		break;
-	default:
-		pr_err("encode_insn_immediate: unknown immediate encoding %d\n",
-			type);
-		return 0;
+		imm_type = AARCH64_INSN_IMM_MOVK;
 	}
 
-	/* Update the immediate field. */
-	insn &= ~(mask << shift);
-	insn |= (imm & mask) << shift;
-
-	return insn;
-}
-
-static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
-			   int lsb, enum aarch64_imm_type imm_type)
-{
-	u64 imm, limit = 0;
-	s64 sval;
-	u32 insn = *(u32 *)place;
-
-	sval = do_reloc(op, place, val);
-	sval >>= lsb;
-	imm = sval & 0xffff;
-
 	/* Update the instruction with the new encoding. */
-	*(u32 *)place = encode_insn_immediate(imm_type, insn, imm);
+	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	*(u32 *)place = cpu_to_le32(insn);
 
 	/* Shift out the immediate field. */
 	sval >>= 16;
@@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 	 * For unsigned immediates, the overflow check is straightforward.
 	 * For signed immediates, the sign bit is actually the bit past the
 	 * most significant bit of the field.
-	 * The INSN_IMM_16 immediate type is unsigned.
+	 * The AARCH64_INSN_IMM_16 immediate type is unsigned.
 	 */
-	if (imm_type != INSN_IMM_16) {
+	if (imm_type != AARCH64_INSN_IMM_16) {
 		sval++;
 		limit++;
 	}
@@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 }
 
 static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
-			  int lsb, int len, enum aarch64_imm_type imm_type)
+			  int lsb, int len, enum aarch64_insn_imm_type imm_type)
 {
 	u64 imm, imm_mask;
 	s64 sval;
-	u32 insn = *(u32 *)place;
+	u32 insn = le32_to_cpu(*(u32 *)place);
 
 	/* Calculate the relocation value. */
 	sval = do_reloc(op, place, val);
@@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
 	imm = sval & imm_mask;
 
 	/* Update the instruction's immediate field. */
-	*(u32 *)place = encode_insn_immediate(imm_type, insn, imm);
+	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	*(u32 *)place = cpu_to_le32(insn);
 
 	/*
 	 * Extract the upper value bits (including the sign bit) and
@@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      INSN_IMM_16);
+					      AARCH64_INSN_IMM_16);
 			break;
 		case R_AARCH64_MOVW_UABS_G1_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      INSN_IMM_16);
+					      AARCH64_INSN_IMM_16);
 			break;
 		case R_AARCH64_MOVW_UABS_G2_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      INSN_IMM_16);
+					      AARCH64_INSN_IMM_16);
 			break;
 		case R_AARCH64_MOVW_UABS_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
-					      INSN_IMM_16);
+					      AARCH64_INSN_IMM_16);
 			break;
 		case R_AARCH64_MOVW_SABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_SABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_SABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G0_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVK);
 			break;
 		case R_AARCH64_MOVW_PREL_G0:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G1_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVK);
 			break;
 		case R_AARCH64_MOVW_PREL_G1:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G2_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVK);
 			break;
 		case R_AARCH64_MOVW_PREL_G2:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
-					      INSN_IMM_MOVNZ);
+					      AARCH64_INSN_IMM_MOVNZ);
 			break;
 
 		/* Immediate instruction relocations. */
 		case R_AARCH64_LD_PREL_LO19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     INSN_IMM_19);
+					     AARCH64_INSN_IMM_19);
 			break;
 		case R_AARCH64_ADR_PREL_LO21:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
-					     INSN_IMM_ADR);
+					     AARCH64_INSN_IMM_ADR);
 			break;
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 		case R_AARCH64_ADR_PREL_PG_HI21:
 			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
-					     INSN_IMM_ADR);
+					     AARCH64_INSN_IMM_ADR);
 			break;
 		case R_AARCH64_ADD_ABS_LO12_NC:
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
-					     INSN_IMM_12);
+					     AARCH64_INSN_IMM_12);
 			break;
 		case R_AARCH64_LDST16_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
-					     INSN_IMM_12);
+					     AARCH64_INSN_IMM_12);
 			break;
 		case R_AARCH64_LDST32_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
-					     INSN_IMM_12);
+					     AARCH64_INSN_IMM_12);
 			break;
 		case R_AARCH64_LDST64_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
-					     INSN_IMM_12);
+					     AARCH64_INSN_IMM_12);
 			break;
 		case R_AARCH64_LDST128_ABS_LO12_NC:
 			overflow_check = false;
 			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
-					     INSN_IMM_12);
+					     AARCH64_INSN_IMM_12);
 			break;
 		case R_AARCH64_TSTBR14:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
-					     INSN_IMM_14);
+					     AARCH64_INSN_IMM_14);
 			break;
 		case R_AARCH64_CONDBR19:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
-					     INSN_IMM_19);
+					     AARCH64_INSN_IMM_19);
 			break;
 		case R_AARCH64_JUMP26:
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
-					     INSN_IMM_26);
+					     AARCH64_INSN_IMM_26);
 			break;
 
 		default:
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cea1594ff933..dfcd8fadde3c 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -22,6 +22,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/perf_event.h>
@@ -363,26 +364,53 @@ validate_group(struct perf_event *event)
 }
 
 static void
+armpmu_disable_percpu_irq(void *data)
+{
+	unsigned int irq = *(unsigned int *)data;
+	disable_percpu_irq(irq);
+}
+
+static void
 armpmu_release_hardware(struct arm_pmu *armpmu)
 {
-	int i, irq, irqs;
+	int irq;
+	unsigned int i, irqs;
 	struct platform_device *pmu_device = armpmu->plat_device;
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (!irqs)
+		return;
 
-	for (i = 0; i < irqs; ++i) {
-		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
-			continue;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, armpmu);
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq <= 0)
+		return;
+
+	if (irq_is_percpu(irq)) {
+		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &cpu_hw_events);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq > 0)
+				free_irq(irq, armpmu);
+		}
 	}
 }
 
+static void
+armpmu_enable_percpu_irq(void *data)
+{
+	unsigned int irq = *(unsigned int *)data;
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
 static int
 armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
-	int i, err, irq, irqs;
+	int err, irq;
+	unsigned int i, irqs;
 	struct platform_device *pmu_device = armpmu->plat_device;
 
 	if (!pmu_device) {
@@ -391,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 	}
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
+	if (!irqs) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < irqs; ++i) {
-		err = 0;
-		irq = platform_get_irq(pmu_device, i);
-		if (irq < 0)
-			continue;
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq <= 0) {
+		pr_err("failed to get valid irq for PMU device\n");
+		return -ENODEV;
+	}
 
-		/*
-		 * If we have a single PMU interrupt that we can't shift,
-		 * assume that we're running on a uniprocessor machine and
-		 * continue. Otherwise, continue without this interrupt.
-		 */
-		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
-			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				    irq, i);
-			continue;
-		}
+	if (irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, armpmu->handle_irq,
+				"arm-pmu", &cpu_hw_events);
 
-		err = request_irq(irq, armpmu->handle_irq,
-				  IRQF_NOBALANCING,
-				  "arm-pmu", armpmu);
 		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
+			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
+					irq);
 			armpmu_release_hardware(armpmu);
 			return err;
 		}
 
-		cpumask_set_cpu(i, &armpmu->active_irqs);
+		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq <= 0)
+				continue;
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+						irq, i);
+				continue;
+			}
+
+			err = request_irq(irq, armpmu->handle_irq,
+					IRQF_NOBALANCING,
+					"arm-pmu", armpmu);
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+						irq);
+				armpmu_release_hardware(armpmu);
+				return err;
+			}
+
+			cpumask_set_cpu(i, &armpmu->active_irqs);
+		}
 	}
 
 	return 0;
@@ -1299,8 +1347,8 @@ early_initcall(init_hw_perf_events);
  * Callchain handling code.
  */
 struct frame_tail {
-	struct frame_tail   __user *fp;
-	unsigned long	    lr;
+	struct frame_tail	__user *fp;
+	unsigned long		lr;
 } __attribute__((packed));
 
 /*
@@ -1337,22 +1385,84 @@ user_backtrace(struct frame_tail __user *tail,
 	return buftail.fp;
 }
 
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+	compat_uptr_t	fp; /* a (struct compat_frame_tail *) in compat mode */
+	u32		sp;
+	u32		lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+compat_user_backtrace(struct compat_frame_tail __user *tail,
+		      struct perf_callchain_entry *entry)
+{
+	struct compat_frame_tail buftail;
+	unsigned long err;
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	perf_callchain_store(entry, buftail.lr);
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= (struct compat_frame_tail __user *)
+			compat_ptr(buftail.fp))
+		return NULL;
+
+	return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
 void perf_callchain_user(struct perf_callchain_entry *entry,
 			 struct pt_regs *regs)
 {
-	struct frame_tail __user *tail;
-
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* We don't support guest os callchain now */
 		return;
 	}
 
 	perf_callchain_store(entry, regs->pc);
-	tail = (struct frame_tail __user *)regs->regs[29];
 
-	while (entry->nr < PERF_MAX_STACK_DEPTH &&
-	       tail && !((unsigned long)tail & 0xf))
-		tail = user_backtrace(tail, entry);
+	if (!compat_user_mode(regs)) {
+		/* AARCH64 mode */
+		struct frame_tail __user *tail;
+
+		tail = (struct frame_tail __user *)regs->regs[29];
+
+		while (entry->nr < PERF_MAX_STACK_DEPTH &&
+		       tail && !((unsigned long)tail & 0xf))
+			tail = user_backtrace(tail, entry);
+	} else {
+#ifdef CONFIG_COMPAT
+		/* AARCH32 compat mode */
+		struct compat_frame_tail __user *tail;
+
+		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+
+		while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+			tail && !((unsigned long)tail & 0x3))
+			tail = compat_user_backtrace(tail, entry);
+#endif
+	}
 }
 
 /*
@@ -1380,6 +1490,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+
 	walk_stackframe(&frame, callchain_trace, entry);
 }
 
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
new file mode 100644
index 000000000000..422ebd63b619
--- /dev/null
+++ b/arch/arm64/kernel/perf_regs.c
@@ -0,0 +1,46 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+
+#include <asm/compat.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+		return 0;
+
+	/*
+	 * Compat (i.e. 32 bit) mode:
+	 * - PC has been set in the pt_regs struct in kernel_entry,
+	 * - Handle SP and LR here.
+	 */
+	if (compat_user_mode(regs)) {
+		if ((u32)idx == PERF_REG_ARM64_SP)
+			return regs->compat_sp;
+		if ((u32)idx == PERF_REG_ARM64_LR)
+			return regs->compat_lr;
+	}
+
+	return regs->regs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (is_compat_thread(task_thread_info(task)))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0860fc3077fc..75c6b1e0d606 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -20,6 +20,7 @@
 
 #include <stdarg.h>
 
+#include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -33,6 +34,7 @@
 #include <linux/kallsyms.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
+#include <linux/cpuidle.h>
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
@@ -71,8 +73,17 @@ static void setup_restart(void)
 
 void soft_restart(unsigned long addr)
 {
+	typedef void (*phys_reset_t)(unsigned long);
+	phys_reset_t phys_reset;
+
 	setup_restart();
-	cpu_reset(addr);
+
+	/* Switch to the identity mapping */
+	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
+	phys_reset(addr);
+
+	/* Should never get here */
+	BUG();
 }
 
 /*
@@ -98,10 +109,19 @@ void arch_cpu_idle(void)
 	 * This should do all the clock switching and wait for interrupt
 	 * tricks
 	 */
-	cpu_do_idle();
-	local_irq_enable();
+	if (cpuidle_idle_call()) {
+		cpu_do_idle();
+		local_irq_enable();
+	}
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+       cpu_die();
+}
+#endif
+
 void machine_shutdown(void)
 {
 #ifdef CONFIG_SMP
@@ -143,15 +163,26 @@ void machine_restart(char *cmd)
 
 void __show_regs(struct pt_regs *regs)
 {
-	int i;
+	int i, top_reg;
+	u64 lr, sp;
+
+	if (compat_user_mode(regs)) {
+		lr = regs->compat_lr;
+		sp = regs->compat_sp;
+		top_reg = 12;
+	} else {
+		lr = regs->regs[30];
+		sp = regs->sp;
+		top_reg = 29;
+	}
 
 	show_regs_print_info(KERN_DEFAULT);
 	print_symbol("PC is at %s\n", instruction_pointer(regs));
-	print_symbol("LR is at %s\n", regs->regs[30]);
+	print_symbol("LR is at %s\n", lr);
 	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
-	       regs->pc, regs->regs[30], regs->pstate);
-	printk("sp : %016llx\n", regs->sp);
-	for (i = 29; i >= 0; i--) {
+	       regs->pc, lr, regs->pstate);
+	printk("sp : %016llx\n", sp);
+	for (i = top_reg; i >= 0; i--) {
 		printk("x%-2d: %016llx ", i, regs->regs[i]);
 		if (i % 2 == 0)
 			printk("\n");
@@ -297,7 +328,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 * Complete any pending TLB or cache maintenance on this CPU in case
 	 * the thread migrates to a different CPU.
 	 */
-	dsb();
+	dsb(ish);
 
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
@@ -308,6 +339,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 unsigned long get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
+	unsigned long stack_page;
 	int count = 0;
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
@@ -315,9 +347,11 @@ unsigned long get_wchan(struct task_struct *p)
 	frame.fp = thread_saved_fp(p);
 	frame.sp = thread_saved_sp(p);
 	frame.pc = thread_saved_pc(p);
+	stack_page = (unsigned long)task_stack_page(p);
 	do {
-		int ret = unwind_frame(&frame);
-		if (ret < 0)
+		if (frame.sp < stack_page ||
+		    frame.sp >= stack_page + THREAD_SIZE ||
+		    unwind_frame(&frame))
 			return 0;
 		if (!in_sched_functions(frame.pc))
 			return frame.pc;
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 14f73c445ff5..0e32ab453e5b 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -15,14 +15,37 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include <linux/cpuidle.h>
 #include <linux/init.h>
 #include <linux/of.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
 
 #include <asm/compiler.h>
+#include <asm/cpu_ops.h>
 #include <asm/errno.h>
 #include <asm/psci.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
 
-struct psci_operations psci_ops;
+#define PSCI_POWER_STATE_TYPE_STANDBY		0
+#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+
+struct psci_power_state {
+	u16	id;
+	u8	type;
+	u8	affinity_level;
+};
+
+struct psci_operations {
+	int (*cpu_suspend)(struct psci_power_state state,
+			   unsigned long entry_point);
+	int (*cpu_off)(struct psci_power_state state);
+	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
+	int (*migrate)(unsigned long cpuid);
+};
+
+static struct psci_operations psci_ops;
 
 static int (*invoke_psci_fn)(u64, u64, u64, u64);
 
@@ -34,6 +57,8 @@ enum psci_function {
 	PSCI_FN_MAX,
 };
 
+static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state);
+
 static u32 psci_function_id[PSCI_FN_MAX];
 
 #define PSCI_RET_SUCCESS		0
@@ -74,6 +99,17 @@ static u32 psci_power_state_pack(struct psci_power_state state)
 			<< PSCI_POWER_STATE_AFFL_SHIFT);
 }
 
+static void psci_power_state_unpack(u32 power_state,
+				    struct psci_power_state *state)
+{
+	state->id = (power_state >> PSCI_POWER_STATE_ID_SHIFT)
+			& PSCI_POWER_STATE_ID_MASK;
+	state->type = (power_state >> PSCI_POWER_STATE_TYPE_SHIFT)
+			& PSCI_POWER_STATE_TYPE_MASK;
+	state->affinity_level = (power_state >> PSCI_POWER_STATE_AFFL_SHIFT)
+			& PSCI_POWER_STATE_AFFL_MASK;
+}
+
 /*
  * The following two functions are invoked via the invoke_psci_fn pointer
  * and will not be inlined, allowing us to piggyback on the AAPCS.
@@ -156,22 +192,91 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-int __init psci_init(void)
+int __init psci_dt_register_idle_states(struct cpuidle_driver *drv,
+					struct device_node *state_nodes[])
+{
+	int cpu, i;
+	struct psci_power_state *psci_states;
+	const struct cpu_operations *cpu_ops_ptr;
+
+	if (!state_nodes)
+		return -EINVAL;
+	/*
+	 * This is belt-and-braces: make sure that if the idle
+	 * specified protocol is psci, the cpu_ops have been
+	 * initialized to psci operations. Anything else is
+	 * a recipe for mayhem.
+	 */
+	for_each_cpu(cpu, drv->cpumask) {
+		cpu_ops_ptr = cpu_ops[cpu];
+		if (WARN_ON(!cpu_ops_ptr || strcmp(cpu_ops_ptr->name, "psci")))
+			return -EOPNOTSUPP;
+	}
+
+	psci_states = kcalloc(drv->state_count, sizeof(*psci_states),
+			      GFP_KERNEL);
+
+	if (!psci_states) {
+		pr_warn("psci idle state allocation failed\n");
+		return -ENOMEM;
+	}
+
+	for_each_cpu(cpu, drv->cpumask) {
+		if (per_cpu(psci_power_state, cpu)) {
+			pr_warn("idle states already initialized on cpu %u\n",
+				cpu);
+			continue;
+		}
+		per_cpu(psci_power_state, cpu) = psci_states;
+	}
+
+
+	for (i = 0; i < drv->state_count; i++) {
+		u32 psci_power_state;
+
+		if (!state_nodes[i]) {
+			/*
+			 * An index with a missing node pointer falls back to
+			 * simple STANDBYWFI
+			 */
+			psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY;
+			continue;
+		}
+
+		if (of_property_read_u32(state_nodes[i], "entry-method-param",
+					 &psci_power_state)) {
+			pr_warn(" * %s missing entry-method-param property\n",
+				state_nodes[i]->full_name);
+			/*
+			 * If entry-method-param property is missing, fall
+			 * back to STANDBYWFI state
+			 */
+			psci_states[i].type = PSCI_POWER_STATE_TYPE_STANDBY;
+			continue;
+		}
+
+		pr_debug("psci-power-state %#x index %u\n",
+			 psci_power_state, i);
+		psci_power_state_unpack(psci_power_state, &psci_states[i]);
+	}
+
+	return 0;
+}
+
+void __init psci_init(void)
 {
 	struct device_node *np;
 	const char *method;
 	u32 id;
-	int err = 0;
 
 	np = of_find_matching_node(NULL, psci_of_match);
 	if (!np)
-		return -ENODEV;
+		return;
 
 	pr_info("probing function IDs from device-tree\n");
 
 	if (of_property_read_string(np, "method", &method)) {
 		pr_warning("missing \"method\" property\n");
-		err = -ENXIO;
 		goto out_put_node;
 	}
 
@@ -181,7 +286,6 @@ int __init psci_init(void)
 		invoke_psci_fn = __invoke_psci_fn_smc;
 	} else {
 		pr_warning("invalid \"method\" property: %s\n", method);
-		err = -EINVAL;
 		goto out_put_node;
 	}
 
@@ -207,5 +311,85 @@ int __init psci_init(void)
 
 out_put_node:
 	of_node_put(np);
+	return;
+}
+
+#ifdef CONFIG_SMP
+
+static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
+{
+	return 0;
+}
+
+static int __init cpu_psci_cpu_prepare(unsigned int cpu)
+{
+	if (!psci_ops.cpu_on) {
+		pr_err("no cpu_on method, not booting CPU%d\n", cpu);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int cpu_psci_cpu_boot(unsigned int cpu)
+{
+	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+	if (err)
+		pr_err("failed to boot CPU%d (%d)\n", cpu, err);
+
 	return err;
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpu_psci_cpu_disable(unsigned int cpu)
+{
+	/* Fail early if we don't have CPU_OFF support */
+	if (!psci_ops.cpu_off)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static void cpu_psci_cpu_die(unsigned int cpu)
+{
+	int ret;
+	/*
+	 * There are no known implementations of PSCI actually using the
+	 * power state field, pass a sensible default for now.
+	 */
+	struct psci_power_state state = {
+		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+	};
+
+	ret = psci_ops.cpu_off(state);
+
+	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
+}
+#endif
+
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+static int cpu_psci_cpu_suspend(unsigned long index)
+{
+	struct psci_power_state *state = __get_cpu_var(psci_power_state);
+
+	if (!state)
+		return -EOPNOTSUPP;
+
+	return psci_ops.cpu_suspend(state[index], virt_to_phys(cpu_resume));
+}
+#endif
+
+const struct cpu_operations cpu_psci_ops = {
+	.name		= "psci",
+	.cpu_init	= cpu_psci_cpu_init,
+	.cpu_prepare	= cpu_psci_cpu_prepare,
+	.cpu_boot	= cpu_psci_cpu_boot,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= cpu_psci_cpu_disable,
+	.cpu_die	= cpu_psci_cpu_die,
+#endif
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+	.cpu_suspend	= cpu_psci_cpu_suspend,
+#endif
+};
+
+#endif
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 33a74fc45959..bc09a147454f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -41,6 +42,9 @@
 #include <asm/traps.h>
 #include <asm/system_misc.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 /*
  * TODO: does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
@@ -53,28 +57,6 @@ void ptrace_disable(struct task_struct *child)
 {
 }
 
-/*
- * Handle hitting a breakpoint.
- */
-static int ptrace_break(struct pt_regs *regs)
-{
-	siginfo_t info = {
-		.si_signo = SIGTRAP,
-		.si_errno = 0,
-		.si_code  = TRAP_BRKPT,
-		.si_addr  = (void __user *)instruction_pointer(regs),
-	};
-
-	force_sig_info(SIGTRAP, &info, current);
-	return 0;
-}
-
-static int arm64_break_trap(unsigned long addr, unsigned int esr,
-			    struct pt_regs *regs)
-{
-	return ptrace_break(regs);
-}
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 /*
  * Handle hitting a HW-breakpoint.
@@ -657,28 +639,27 @@ static int compat_gpr_get(struct task_struct *target,
 
 	for (i = 0; i < num_regs; ++i) {
 		unsigned int idx = start + i;
-		void *reg;
+		compat_ulong_t reg;
 
 		switch (idx) {
 		case 15:
-			reg = (void *)&task_pt_regs(target)->pc;
+			reg = task_pt_regs(target)->pc;
 			break;
 		case 16:
-			reg = (void *)&task_pt_regs(target)->pstate;
+			reg = task_pt_regs(target)->pstate;
 			break;
 		case 17:
-			reg = (void *)&task_pt_regs(target)->orig_x0;
+			reg = task_pt_regs(target)->orig_x0;
 			break;
 		default:
-			reg = (void *)&task_pt_regs(target)->regs[idx];
+			reg = task_pt_regs(target)->regs[idx];
 		}
 
-		ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t));
-
+		ret = copy_to_user(ubuf, &reg, sizeof(reg));
 		if (ret)
 			break;
-		else
-			ubuf += sizeof(compat_ulong_t);
+
+		ubuf += sizeof(reg);
 	}
 
 	return ret;
@@ -706,28 +687,28 @@ static int compat_gpr_set(struct task_struct *target,
 
 	for (i = 0; i < num_regs; ++i) {
 		unsigned int idx = start + i;
-		void *reg;
+		compat_ulong_t reg;
+
+		ret = copy_from_user(&reg, ubuf, sizeof(reg));
+		if (ret)
+			return ret;
+
+		ubuf += sizeof(reg);
 
 		switch (idx) {
 		case 15:
-			reg = (void *)&newregs.pc;
+			newregs.pc = reg;
 			break;
 		case 16:
-			reg = (void *)&newregs.pstate;
+			newregs.pstate = reg;
 			break;
 		case 17:
-			reg = (void *)&newregs.orig_x0;
+			newregs.orig_x0 = reg;
 			break;
 		default:
-			reg = (void *)&newregs.regs[idx];
+			newregs.regs[idx] = reg;
 		}
 
-		ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t));
-
-		if (ret)
-			goto out;
-		else
-			ubuf += sizeof(compat_ulong_t);
 	}
 
 	if (valid_user_regs(&newregs.user_regs))
@@ -735,7 +716,6 @@ static int compat_gpr_set(struct task_struct *target,
 	else
 		ret = -EINVAL;
 
-out:
 	return ret;
 }
 
@@ -816,33 +796,6 @@ static const struct user_regset_view user_aarch32_view = {
 	.regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets)
 };
 
-int aarch32_break_trap(struct pt_regs *regs)
-{
-	unsigned int instr;
-	bool bp = false;
-	void __user *pc = (void __user *)instruction_pointer(regs);
-
-	if (compat_thumb_mode(regs)) {
-		/* get 16-bit Thumb instruction */
-		get_user(instr, (u16 __user *)pc);
-		if (instr == AARCH32_BREAK_THUMB2_LO) {
-			/* get second half of 32-bit Thumb-2 instruction */
-			get_user(instr, (u16 __user *)(pc + 2));
-			bp = instr == AARCH32_BREAK_THUMB2_HI;
-		} else {
-			bp = instr == AARCH32_BREAK_THUMB;
-		}
-	} else {
-		/* 32-bit ARM instruction */
-		get_user(instr, (u32 __user *)pc);
-		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
-	}
-
-	if (bp)
-		return ptrace_break(regs);
-	return 1;
-}
-
 static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
 				   compat_ulong_t __user *ret)
 {
@@ -1114,45 +1067,49 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
 
-static int __init ptrace_break_init(void)
-{
-	hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP,
-			      TRAP_BRKPT, "ptrace BRK handler");
-	return 0;
-}
-core_initcall(ptrace_break_init);
-
-
-asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
+static void tracehook_report_syscall(struct pt_regs *regs,
+				     enum ptrace_syscall_dir dir)
 {
+	int regno;
 	unsigned long saved_reg;
 
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return regs->syscallno;
-
-	if (is_compat_task()) {
-		/* AArch32 uses ip (r12) for scratch */
-		saved_reg = regs->regs[12];
-		regs->regs[12] = dir;
-	} else {
-		/*
-		 * Save X7. X7 is used to denote syscall entry/exit:
-		 *   X7 = 0 -> entry, = 1 -> exit
-		 */
-		saved_reg = regs->regs[7];
-		regs->regs[7] = dir;
-	}
+	/*
+	 * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+	 * used to denote syscall entry/exit:
+	 */
+	regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[regno];
+	regs->regs[regno] = dir;
 
-	if (dir)
+	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
 		regs->syscallno = ~0UL;
 
-	if (is_compat_task())
-		regs->regs[12] = saved_reg;
-	else
-		regs->regs[7] = saved_reg;
+	regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, regs->syscallno);
 
 	return regs->syscallno;
 }
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, regs_return_value(regs));
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
new file mode 100644
index 000000000000..89102a6ffad5
--- /dev/null
+++ b/arch/arm64/kernel/return_address.c
@@ -0,0 +1,55 @@
+/*
+ * arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+	unsigned int level;
+	void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+	struct return_address_data *data = d;
+
+	if (!data->level) {
+		data->addr = (void *)frame->pc;
+		return 1;
+	} else {
+		--data->level;
+		return 0;
+	}
+}
+
+void *return_address(unsigned int level)
+{
+	struct return_address_data data;
+	struct stackframe frame;
+	register unsigned long current_sp asm ("sp");
+
+	data.level = level + 2;
+	data.addr = NULL;
+
+	frame.fp = (unsigned long)__builtin_frame_address(0);
+	frame.sp = current_sp;
+	frame.pc = (unsigned long)return_address; /* dummy */
+
+	walk_stackframe(&frame, save_return_addr, &data);
+
+	if (!data.level)
+		return data.addr;
+	else
+		return NULL;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index add6ea616843..e87b5fd07b8c 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -41,10 +41,13 @@
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
+#include <linux/efi.h>
 
+#include <asm/fixmap.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/cputable.h>
+#include <asm/cpu_ops.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -53,13 +56,25 @@
 #include <asm/traps.h>
 #include <asm/memblock.h>
 #include <asm/psci.h>
+#include <asm/efi.h>
 
 unsigned int processor_id;
 EXPORT_SYMBOL(processor_id);
 
-unsigned int elf_hwcap __read_mostly;
+unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT	\
+				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
 static const char *cpu_name;
 static const char *machine_name;
 phys_addr_t __fdt_pointer __initdata;
@@ -97,15 +112,95 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
-static void __init setup_processor(void)
+void __init smp_setup_processor_id(void)
 {
-	struct cpu_info *cpu_info;
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
+}
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return phys_id == cpu_logical_map(cpu);
+}
 
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity, fs[4], bits[4], ls;
+	u64 mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits %#llx\n", mask);
 	/*
-	 * locate processor in the list of supported processor
-	 * types.  The linker builds this table for us from the
-	 * entries in arch/arm/mm/proc.S
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
 	 */
+	for (i = 0; i < 4; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR_EL1 by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 32 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR_EL1 through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) +
+				  fs[3] - (bits[2] + bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n",
+		mpidr_hash.shift_aff[0],
+		mpidr_hash.shift_aff[1],
+		mpidr_hash.shift_aff[2],
+		mpidr_hash.shift_aff[3],
+		mpidr_hash.mask,
+		mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
+}
+#endif
+
+static void __init setup_processor(void)
+{
+	struct cpu_info *cpu_info;
+	u64 features, block;
+
 	cpu_info = lookup_processor_type(read_cpuid_id());
 	if (!cpu_info) {
 		printk("CPU configuration botched (ID %08x), unable to continue.\n",
@@ -118,8 +213,71 @@ static void __init setup_processor(void)
 	printk("CPU: %s [%08x] revision %d\n",
 	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
 
-	sprintf(init_utsname()->machine, "aarch64");
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
+
+	/*
+	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
+	 * The blocks we test below represent incremental functionality
+	 * for non-negative values. Negative values are reserved.
+	 */
+	features = read_cpuid(ID_AA64ISAR0_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			elf_hwcap |= HWCAP_PMULL;
+		case 1:
+			elf_hwcap |= HWCAP_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+	 * the Aarch32 32-bit execution state.
+	 */
+	features = read_cpuid(ID_ISAR5_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+		case 1:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -257,20 +415,27 @@ void __init setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
+	early_ioremap_init();
+
 	parse_early_param();
 
+	efi_init();
 	arm64_memblock_init();
 
 	paging_init();
 	request_standard_resources();
 
+	efi_idmap_init();
+
 	unflatten_device_tree();
 
 	psci_init();
 
 	cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
+	cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
 	smp_init_cpus();
+	smp_build_mpidr_hash();
 #endif
 
 #ifdef CONFIG_VT
@@ -288,7 +453,7 @@ static int __init arm64_device_init(void)
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 	return 0;
 }
-arch_initcall(arm64_device_init);
+arch_initcall_sync(arm64_device_init);
 
 static DEFINE_PER_CPU(struct cpu, cpu_data);
 
@@ -309,6 +474,12 @@ subsys_initcall(topology_init);
 static const char *hwcap_str[] = {
 	"fp",
 	"asimd",
+	"evtstrm",
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
 	NULL
 };
 
@@ -328,9 +499,6 @@ static int c_show(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
 		seq_printf(m, "processor\t: %d\n", i);
 #endif
-		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n",
-			   loops_per_jiffy / (500000UL/HZ),
-			   loops_per_jiffy / (5000UL/HZ) % 100);
 	}
 
 	/* dump out the processor features */
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 890a591f75dd..e3cf09626245 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
@@ -25,7 +26,6 @@
 #include <linux/tracehook.h>
 #include <linux/ratelimit.h>
 
-#include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -100,8 +100,7 @@ static int restore_sigframe(struct pt_regs *regs,
 {
 	sigset_t set;
 	int i, err;
-	struct aux_context __user *aux =
-		(struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+	void *aux = sf->uc.uc_mcontext.__reserved;
 
 	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
 	if (err == 0)
@@ -121,8 +120,11 @@ static int restore_sigframe(struct pt_regs *regs,
 
 	err |= !valid_user_regs(&regs->user_regs);
 
-	if (err == 0)
-		err |= restore_fpsimd_context(&aux->fpsimd);
+	if (err == 0) {
+		struct fpsimd_context *fpsimd_ctx =
+			container_of(aux, struct fpsimd_context, head);
+		err |= restore_fpsimd_context(fpsimd_ctx);
+	}
 
 	return err;
 }
@@ -167,8 +169,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
 			  struct pt_regs *regs, sigset_t *set)
 {
 	int i, err = 0;
-	struct aux_context __user *aux =
-		(struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+	void *aux = sf->uc.uc_mcontext.__reserved;
+	struct _aarch64_ctx *end;
 
 	/* set up the stack frame for unwinding */
 	__put_user_error(regs->regs[29], &sf->fp, err);
@@ -185,12 +187,17 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
 
 	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
 
-	if (err == 0)
-		err |= preserve_fpsimd_context(&aux->fpsimd);
+	if (err == 0) {
+		struct fpsimd_context *fpsimd_ctx =
+			container_of(aux, struct fpsimd_context, head);
+		err |= preserve_fpsimd_context(fpsimd_ctx);
+		aux += sizeof(*fpsimd_ctx);
+	}
 
 	/* set the "end" magic */
-	__put_user_error(0, &aux->end.magic, err);
-	__put_user_error(0, &aux->end.size, err);
+	end = aux;
+	__put_user_error(0, &end->magic, err);
+	__put_user_error(0, &end->size, err);
 
 	return err;
 }
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174fe859..e51bbe79f5b5 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -100,34 +100,6 @@ struct compat_rt_sigframe {
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-/*
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- */
-#define MOV_R7_NR_SIGRETURN	(0xe3a07000 | __NR_compat_sigreturn)
-#define SVC_SYS_SIGRETURN	(0xef000000 | __NR_compat_sigreturn)
-#define MOV_R7_NR_RT_SIGRETURN	(0xe3a07000 | __NR_compat_rt_sigreturn)
-#define SVC_SYS_RT_SIGRETURN	(0xef000000 | __NR_compat_rt_sigreturn)
-
-/*
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
-#define SVC_THUMB_SIGRETURN	(((0xdf00 | __NR_compat_sigreturn) << 16) | \
-				   0x2700 | __NR_compat_sigreturn)
-#define SVC_THUMB_RT_SIGRETURN	(((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \
-				   0x2700 | __NR_compat_rt_sigreturn)
-
-const compat_ulong_t aarch32_sigret_code[6] = {
-	/*
-	 * AArch32 sigreturn code.
-	 * We don't construct an OABI SWI - instead we just set the imm24 field
-	 * to the EABI syscall number so that we create a sane disassembly.
-	 */
-	MOV_R7_NR_SIGRETURN,    SVC_SYS_SIGRETURN,    SVC_THUMB_SIGRETURN,
-	MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN,
-};
-
 static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 {
 	compat_sigset_t	cset;
@@ -474,12 +446,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 	/* Check if the handler is written for ARM or Thumb */
 	thumb = handler & 1;
 
-	if (thumb) {
+	if (thumb)
 		spsr |= COMPAT_PSR_T_BIT;
-		spsr &= ~COMPAT_PSR_IT_MASK;
-	} else {
+	else
 		spsr &= ~COMPAT_PSR_T_BIT;
-	}
+
+	/* The IT state must be cleared for both ARM and Thumb-2 */
+	spsr &= ~COMPAT_PSR_IT_MASK;
 
 	if (ka->sa.sa_flags & SA_RESTORER) {
 		retcode = ptr_to_compat(ka->sa.sa_restorer);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
new file mode 100644
index 000000000000..b1925729c692
--- /dev/null
+++ b/arch/arm64/kernel/sleep.S
@@ -0,0 +1,184 @@
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+	.text
+/*
+ * Implementation of MPIDR_EL1 hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @rs3: register containing affinity level 3 bit shift
+ * @mpidr: register containing MPIDR_EL1 value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) {
+ *	u32 aff0, aff1, aff2, aff3;
+ *	u64 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	aff2 = mpidr_masked & 0xff00000000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
+ *}
+ * Input registers: rs0, rs1, rs2, rs3, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = x1 and dst = x1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask
+	and	\mpidr, \mpidr, \mask		// mask out MPIDR bits
+	and	\dst, \mpidr, #0xff		// mask=aff0
+	lsr	\dst ,\dst, \rs0		// dst=aff0>>rs0
+	and	\mask, \mpidr, #0xff00		// mask = aff1
+	lsr	\mask ,\mask, \rs1
+	orr	\dst, \dst, \mask		// dst|=(aff1>>rs1)
+	and	\mask, \mpidr, #0xff0000	// mask = aff2
+	lsr	\mask ,\mask, \rs2
+	orr	\dst, \dst, \mask		// dst|=(aff2>>rs2)
+	and	\mask, \mpidr, #0xff00000000	// mask = aff3
+	lsr	\mask ,\mask, \rs3
+	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3)
+	.endm
+/*
+ * Save CPU state for a suspend.  This saves callee registers, and allocates
+ * space on the kernel stack to save the CPU specific registers + some
+ * other data for resume.
+ *
+ *  x0 = suspend finisher argument
+ */
+ENTRY(__cpu_suspend)
+	stp	x29, lr, [sp, #-96]!
+	stp	x19, x20, [sp,#16]
+	stp	x21, x22, [sp,#32]
+	stp	x23, x24, [sp,#48]
+	stp	x25, x26, [sp,#64]
+	stp	x27, x28, [sp,#80]
+	mov	x2, sp
+	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx
+	mov	x1, sp
+	/*
+	 * x1 now points to struct cpu_suspend_ctx allocated on the stack
+	 */
+	str	x2, [x1, #CPU_CTX_SP]
+	ldr	x2, =sleep_save_sp
+	ldr	x2, [x2, #SLEEP_SAVE_SP_VIRT]
+#ifdef CONFIG_SMP
+	mrs	x7, mpidr_el1
+	ldr	x9, =mpidr_hash
+	ldr	x10, [x9, #MPIDR_HASH_MASK]
+	/*
+	 * Following code relies on the struct mpidr_hash
+	 * members size.
+	 */
+	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS]
+	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
+	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
+	add	x2, x2, x8, lsl #3
+#endif
+	bl	__cpu_suspend_finisher
+        /*
+	 * Never gets here, unless suspend fails.
+	 * Successful cpu_suspend should return from cpu_resume, returning
+	 * through this code path is considered an error
+	 * If the return value is set to 0 force x0 = -EOPNOTSUPP
+	 * to make sure a proper error condition is propagated
+	 */
+	cmp	x0, #0
+	mov	x3, #-EOPNOTSUPP
+	csel	x0, x3, x0, eq
+	add	sp, sp, #CPU_SUSPEND_SZ	// rewind stack pointer
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x25, x26, [sp, #64]
+	ldp	x27, x28, [sp, #80]
+	ldp	x29, lr, [sp], #96
+	ret
+ENDPROC(__cpu_suspend)
+	.ltorg
+
+/*
+ * x0 must contain the sctlr value retrieved from restored context
+ */
+ENTRY(cpu_resume_mmu)
+	ldr	x3, =cpu_resume_after_mmu
+	msr	sctlr_el1, x0		// restore sctlr_el1
+	isb
+	br	x3			// global jump to virtual address
+ENDPROC(cpu_resume_mmu)
+cpu_resume_after_mmu:
+	mov	x0, #0			// return zero on success
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x25, x26, [sp, #64]
+	ldp	x27, x28, [sp, #80]
+	ldp	x29, lr, [sp], #96
+	ret
+ENDPROC(cpu_resume_after_mmu)
+
+	.data
+ENTRY(cpu_resume)
+	bl	el2_setup		// if in EL2 drop to EL1 cleanly
+#ifdef CONFIG_SMP
+	mrs	x1, mpidr_el1
+	adr	x4, mpidr_hash_ptr
+	ldr	x5, [x4]
+	add	x8, x4, x5		// x8 = struct mpidr_hash phys address
+        /* retrieve mpidr_hash members to compute the hash */
+	ldr	x2, [x8, #MPIDR_HASH_MASK]
+	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS]
+	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
+	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
+        /* x7 contains hash index, let's use it to grab context pointer */
+#else
+	mov	x7, xzr
+#endif
+	adr	x0, sleep_save_sp
+	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS]
+	ldr	x0, [x0, x7, lsl #3]
+	/* load sp from context */
+	ldr	x2, [x0, #CPU_CTX_SP]
+	adr	x1, sleep_idmap_phys
+	/* load physical address of identity map page table in x1 */
+	ldr	x1, [x1]
+	mov	sp, x2
+	/*
+	 * cpu_do_resume expects x0 to contain context physical address
+	 * pointer and x1 to contain physical address of 1:1 page tables
+	 */
+	bl	cpu_do_resume		// PC relative jump, MMU off
+	b	cpu_resume_mmu		// Resume MMU, never returns
+ENDPROC(cpu_resume)
+
+	.align 3
+mpidr_hash_ptr:
+	/*
+	 * offset of mpidr_hash symbol from current location
+	 * used to obtain run-time mpidr_hash address with MMU off
+         */
+	.quad	mpidr_hash - .
+/*
+ * physical address of identity mapped page tables
+ */
+	.type	sleep_idmap_phys, #object
+ENTRY(sleep_idmap_phys)
+	.quad	0
+/*
+ * struct sleep_save_sp {
+ *	phys_addr_t *save_ptr_stash;
+ *	phys_addr_t save_ptr_stash_phys;
+ * };
+ */
+	.type	sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+	.space	SLEEP_SAVE_SP_SZ	// struct sleep_save_sp
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9c93e126328c..7c868a2ac38b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/cpu_ops.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -48,76 +49,34 @@
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
  * where to place its SVC stack
  */
 struct secondary_data secondary_data;
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
 
 enum ipi_msg_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
+	IPI_TIMER,
 };
 
-static DEFINE_RAW_SPINLOCK(boot_lock);
-
-/*
- * Write secondary_holding_pen_release in a way that is guaranteed to be
- * visible to all observers, irrespective of whether they're taking part
- * in coherency or not.  This is necessary for the hotplug code to work
- * reliably.
- */
-static void __cpuinit write_pen_release(u64 val)
-{
-	void *start = (void *)&secondary_holding_pen_release;
-	unsigned long size = sizeof(secondary_holding_pen_release);
-
-	secondary_holding_pen_release = val;
-	__flush_dcache_area(start, size);
-}
-
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
  * This also gives us the initial stack to use for this CPU.
  */
 static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	unsigned long timeout;
-
-	/*
-	 * Set synchronisation state between this boot processor
-	 * and the secondary one
-	 */
-	raw_spin_lock(&boot_lock);
-
-	/*
-	 * Update the pen release flag.
-	 */
-	write_pen_release(cpu_logical_map(cpu));
+	if (cpu_ops[cpu]->cpu_boot)
+		return cpu_ops[cpu]->cpu_boot(cpu);
 
-	/*
-	 * Send an event, causing the secondaries to read pen_release.
-	 */
-	sev();
-
-	timeout = jiffies + (1 * HZ);
-	while (time_before(jiffies, timeout)) {
-		if (secondary_holding_pen_release == INVALID_HWID)
-			break;
-		udelay(10);
-	}
-
-	/*
-	 * Now the secondary core is starting up let it run its
-	 * calibrations, then wait for it to finish
-	 */
-	raw_spin_unlock(&boot_lock);
-
-	return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
+	return -EOPNOTSUPP;
 }
 
 static DECLARE_COMPLETION(cpu_running);
@@ -158,6 +117,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	return ret;
 }
 
+static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+	store_cpu_topology(cpuid);
+}
+
 /*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
@@ -167,8 +131,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();
 
-	printk("CPU%u: Booted secondary processor\n", cpu);
-
 	/*
 	 * All kernel threads share the same mm context; grab a
 	 * reference and switch to it.
@@ -177,6 +139,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	printk("CPU%u: Booted secondary processor\n", cpu);
+
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
@@ -187,17 +152,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	preempt_disable();
 	trace_hardirqs_off();
 
-	/*
-	 * Let the primary processor know we're out of the
-	 * pen, then head off into the C entry point
-	 */
-	write_pen_release(INVALID_HWID);
+	if (cpu_ops[cpu]->cpu_postboot)
+		cpu_ops[cpu]->cpu_postboot();
 
 	/*
-	 * Synchronise with the boot thread.
+	 * Enable GIC and timers.
 	 */
-	raw_spin_lock(&boot_lock);
-	raw_spin_unlock(&boot_lock);
+	notify_cpu_starting(cpu);
+
+	smp_store_cpu_info(cpu);
 
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
@@ -207,11 +170,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
-	/*
-	 * Enable GIC and timers.
-	 */
-	notify_cpu_starting(cpu);
-
+	local_dbg_enable();
 	local_irq_enable();
 	local_fiq_enable();
 
@@ -221,42 +180,117 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_disable(unsigned int cpu)
 {
-	unsigned long bogosum = loops_per_jiffy * num_online_cpus();
+	/*
+	 * If we don't have a cpu_die method, abort before we reach the point
+	 * of no return. CPU0 may not have an cpu_ops, so test for it.
+	 */
+	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+		return -EOPNOTSUPP;
 
-	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-		num_online_cpus(), bogosum / (500000/HZ),
-		(bogosum / (5000/HZ)) % 100);
+	/*
+	 * We may need to abort a hot unplug for some other mechanism-specific
+	 * reason.
+	 */
+	if (cpu_ops[cpu]->cpu_disable)
+		return cpu_ops[cpu]->cpu_disable(cpu);
+
+	return 0;
 }
 
-void __init smp_prepare_boot_cpu(void)
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpu_disable(void)
 {
-}
+	unsigned int cpu = smp_processor_id();
+	int ret;
 
-static void (*smp_cross_call)(const struct cpumask *, unsigned int);
+	ret = op_cpu_disable(cpu);
+	if (ret)
+		return ret;
 
-static const struct smp_enable_ops *enable_ops[] __initconst = {
-	&smp_spin_table_ops,
-	&smp_psci_ops,
-	NULL,
-};
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	set_cpu_online(cpu, false);
 
-static const struct smp_enable_ops *smp_enable_ops[NR_CPUS];
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
 
-static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)
-{
-	const struct smp_enable_ops **ops = enable_ops;
+	/*
+	 * Remove this CPU from the vm mask set of all processes.
+	 */
+	clear_tasks_mm_cpumask(cpu);
 
-	while (*ops) {
-		if (!strcmp(name, (*ops)->name))
-			return *ops;
+	return 0;
+}
+
+static DECLARE_COMPLETION(cpu_died);
 
-		ops++;
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpu_die(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+		pr_crit("CPU%u: cpu didn't die\n", cpu);
+		return;
 	}
+	pr_notice("CPU%u: shutdown\n", cpu);
+}
 
-	return NULL;
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	idle_task_exit();
+
+	local_irq_disable();
+
+	/* Tell __cpu_die() that this CPU is now safe to dispose of */
+	complete(&cpu_died);
+
+	/*
+	 * Actually shutdown the CPU. This must never fail. The specific hotplug
+	 * mechanism must perform all required cache maintenance to ensure that
+	 * no dirty lines are lost in the process of shutting down the CPU.
+	 */
+	cpu_ops[cpu]->cpu_die(cpu);
+
+	BUG();
 }
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	unsigned long bogosum = loops_per_jiffy * num_online_cpus();
+
+	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		num_online_cpus(), bogosum / (500000/HZ),
+		(bogosum / (5000/HZ)) % 100);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+}
+
+static void (*smp_cross_call)(const struct cpumask *, unsigned int);
 
 /*
  * Enumerate the possible CPU set from the device tree and build the
@@ -265,9 +299,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)
  */
 void __init smp_init_cpus(void)
 {
-	const char *enable_method;
 	struct device_node *dn = NULL;
-	int i, cpu = 1;
+	unsigned int i, cpu = 1;
 	bool bootcpu_valid = false;
 
 	while ((dn = of_find_node_by_type(dn, "cpu"))) {
@@ -336,25 +369,10 @@ void __init smp_init_cpus(void)
 		if (cpu >= NR_CPUS)
 			goto next;
 
-		/*
-		 * We currently support only the "spin-table" enable-method.
-		 */
-		enable_method = of_get_property(dn, "enable-method", NULL);
-		if (!enable_method) {
-			pr_err("%s: missing enable-method property\n",
-				dn->full_name);
+		if (cpu_read_ops(dn, cpu) != 0)
 			goto next;
-		}
-
-		smp_enable_ops[cpu] = smp_get_enable_ops(enable_method);
 
-		if (!smp_enable_ops[cpu]) {
-			pr_err("%s: invalid enable-method property: %s\n",
-			       dn->full_name, enable_method);
-			goto next;
-		}
-
-		if (smp_enable_ops[cpu]->init_cpu(dn, cpu))
+		if (cpu_ops[cpu]->cpu_init(dn, cpu))
 			goto next;
 
 		pr_debug("cpu logical map 0x%llx\n", hwid);
@@ -384,8 +402,12 @@ next:
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	int cpu, err;
-	unsigned int ncores = num_possible_cpus();
+	int err;
+	unsigned int cpu, ncores = num_possible_cpus();
+
+	init_cpu_topology();
+
+	smp_store_cpu_info(smp_processor_id());
 
 	/*
 	 * are we trying to boot more cores than exist?
@@ -412,10 +434,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		if (cpu == smp_processor_id())
 			continue;
 
-		if (!smp_enable_ops[cpu])
+		if (!cpu_ops[cpu])
 			continue;
 
-		err = smp_enable_ops[cpu]->prepare_cpu(cpu);
+		err = cpu_ops[cpu]->cpu_prepare(cpu);
 		if (err)
 			continue;
 
@@ -446,6 +468,7 @@ static const char *ipi_types[NR_IPI] = {
 	S(IPI_CALL_FUNC, "Function call interrupts"),
 	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_TIMER, "Timer broadcast interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -455,7 +478,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	for (i = 0; i < NR_IPI; i++) {
 		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE,
 			   prec >= 4 ? " " : "");
-		for_each_present_cpu(cpu)
+		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ",
 				   __get_irq_stat(cpu, ipi_irqs[i]));
 		seq_printf(p, "      %s\n", ipi_types[i]);
@@ -531,6 +554,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	case IPI_TIMER:
+		irq_enter();
+		tick_receive_broadcast();
+		irq_exit();
+		break;
+#endif
+
 	default:
 		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
 		break;
@@ -543,6 +574,13 @@ void smp_send_reschedule(int cpu)
 	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
 }
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_TIMER);
+}
+#endif
+
 void smp_send_stop(void)
 {
 	unsigned long timeout;
diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c
deleted file mode 100644
index 0c533301be77..000000000000
--- a/arch/arm64/kernel/smp_psci.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * PSCI SMP initialisation
- *
- * Copyright (C) 2013 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-static int __init smp_psci_init_cpu(struct device_node *dn, int cpu)
-{
-	return 0;
-}
-
-static int __init smp_psci_prepare_cpu(int cpu)
-{
-	int err;
-
-	if (!psci_ops.cpu_on) {
-		pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu);
-		return -ENODEV;
-	}
-
-	err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen));
-	if (err) {
-		pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
-		return err;
-	}
-
-	return 0;
-}
-
-const struct smp_enable_ops smp_psci_ops __initconst = {
-	.name		= "psci",
-	.init_cpu	= smp_psci_init_cpu,
-	.prepare_cpu	= smp_psci_prepare_cpu,
-};
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 7c35fa682f76..0347d38eea29 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -16,15 +16,38 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+extern void secondary_holding_pen(void);
+volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
 
 static phys_addr_t cpu_release_addr[NR_CPUS];
 
-static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
+/*
+ * Write secondary_holding_pen_release in a way that is guaranteed to be
+ * visible to all observers, irrespective of whether they're taking part
+ * in coherency or not.  This is necessary for the hotplug code to work
+ * reliably.
+ */
+static void write_pen_release(u64 val)
+{
+	void *start = (void *)&secondary_holding_pen_release;
+	unsigned long size = sizeof(secondary_holding_pen_release);
+
+	secondary_holding_pen_release = val;
+	__flush_dcache_area(start, size);
+}
+
+
+static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
 {
 	/*
 	 * Determine the address from which the CPU is polling.
@@ -40,7 +63,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
 	return 0;
 }
 
-static int __init smp_spin_table_prepare_cpu(int cpu)
+static int smp_spin_table_cpu_prepare(unsigned int cpu)
 {
 	void **release_addr;
 
@@ -48,7 +71,16 @@ static int __init smp_spin_table_prepare_cpu(int cpu)
 		return -ENODEV;
 
 	release_addr = __va(cpu_release_addr[cpu]);
-	release_addr[0] = (void *)__pa(secondary_holding_pen);
+
+	/*
+	 * We write the release address as LE regardless of the native
+	 * endianess of the kernel. Therefore, any boot-loaders that
+	 * read this address need to convert this address to the
+	 * boot-loader's endianess before jumping. This is mandated by
+	 * the boot protocol.
+	 */
+	release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen));
+
 	__flush_dcache_area(release_addr, sizeof(release_addr[0]));
 
 	/*
@@ -59,8 +91,24 @@ static int __init smp_spin_table_prepare_cpu(int cpu)
 	return 0;
 }
 
-const struct smp_enable_ops smp_spin_table_ops __initconst = {
+static int smp_spin_table_cpu_boot(unsigned int cpu)
+{
+	/*
+	 * Update the pen release flag.
+	 */
+	write_pen_release(cpu_logical_map(cpu));
+
+	/*
+	 * Send an event, causing the secondaries to read pen_release.
+	 */
+	sev();
+
+	return 0;
+}
+
+const struct cpu_operations smp_spin_table_ops = {
 	.name		= "spin-table",
-	.init_cpu 	= smp_spin_table_init_cpu,
-	.prepare_cpu	= smp_spin_table_prepare_cpu,
+	.cpu_init	= smp_spin_table_cpu_init,
+	.cpu_prepare	= smp_spin_table_cpu_prepare,
+	.cpu_boot	= smp_spin_table_cpu_boot,
 };
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 048334bb2651..55437ba1f5a4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -35,7 +35,7 @@
  *	ldp	x29, x30, [sp]
  *	add	sp, sp, #0x10
  */
-int unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct stackframe *frame)
 {
 	unsigned long high, low;
 	unsigned long fp = frame->fp;
@@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame)
 	low  = frame->sp;
 	high = ALIGN(low, THREAD_SIZE);
 
-	if (fp < low || fp > high || fp & 0xf)
+	if (fp < low || fp > high - 0x18 || fp & 0xf)
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
new file mode 100644
index 000000000000..1fa9ce4afd8f
--- /dev/null
+++ b/arch/arm64/kernel/suspend.c
@@ -0,0 +1,140 @@
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/debug-monitors.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+extern int __cpu_suspend(unsigned long);
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ *
+ * @arg: Argument to pass to suspend operations
+ * @ptr: CPU context virtual address
+ * @save_ptr: address of the location where the context physical address
+ *            must be saved
+ */
+int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
+			   phys_addr_t *save_ptr)
+{
+	int cpu = smp_processor_id();
+
+	*save_ptr = virt_to_phys(ptr);
+
+	cpu_do_suspend(ptr);
+	/*
+	 * Only flush the context that must be retrieved with the MMU
+	 * off. VA primitives ensure the flush is applied to all
+	 * cache levels so context is pushed to DRAM.
+	 */
+	__flush_dcache_area(ptr, sizeof(*ptr));
+	__flush_dcache_area(save_ptr, sizeof(*save_ptr));
+
+	return cpu_ops[cpu]->cpu_suspend(arg);
+}
+
+/*
+ * This hook is provided so that cpu_suspend code can restore HW
+ * breakpoints as early as possible in the resume path, before reenabling
+ * debug exceptions. Code cannot be run from a CPU PM notifier since by the
+ * time the notifier runs debug exceptions might have been enabled already,
+ * with HW breakpoints registers content still in an unknown state.
+ */
+void (*hw_breakpoint_restore)(void *);
+void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+{
+	/* Prevent multiple restore hook initializations */
+	if (WARN_ON(hw_breakpoint_restore))
+		return;
+	hw_breakpoint_restore = hw_bp_restore;
+}
+
+/**
+ * cpu_suspend
+ *
+ * @arg: argument to pass to the finisher function
+ */
+int cpu_suspend(unsigned long arg)
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret, cpu = smp_processor_id();
+	unsigned long flags;
+
+	/*
+	 * If cpu_ops have not been registered or suspend
+	 * has not been initialized, cpu_suspend call fails early.
+	 */
+	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
+		return -EOPNOTSUPP;
+
+	/*
+	 * From this point debug exceptions are disabled to prevent
+	 * updates to mdscr register (saved and restored along with
+	 * general purpose registers) from kernel debuggers.
+	 */
+	local_dbg_save(flags);
+
+	/*
+	 * mm context saved on the stack, it will be restored when
+	 * the cpu comes out of reset through the identity mapped
+	 * page tables, so that the thread address space is properly
+	 * set-up on function return.
+	 */
+	ret = __cpu_suspend(arg);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		flush_tlb_all();
+
+		/*
+		 * Restore per-cpu offset before any kernel
+		 * subsystem relying on it has a chance to run.
+		 */
+		set_my_cpu_offset(per_cpu_offset(cpu));
+
+		/*
+		 * Restore HW breakpoint registers to sane values
+		 * before debug exceptions are possibly reenabled
+		 * through local_dbg_restore.
+		 */
+		if (hw_breakpoint_restore)
+			hw_breakpoint_restore(NULL);
+	}
+
+	/*
+	 * Restore pstate flags. OS lock and mdscr have been already
+	 * restored, so from this point onwards, debugging is fully
+	 * renabled if it was enabled when core started shutdown.
+	 */
+	local_dbg_restore(flags);
+
+	return ret;
+}
+
+extern struct sleep_save_sp sleep_save_sp;
+extern phys_addr_t sleep_idmap_phys;
+
+static int cpu_suspend_init(void)
+{
+	void *ctx_ptr;
+
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sleep_idmap_phys = virt_to_phys(idmap_pg_dir);
+	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
+	__flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys));
+
+	return 0;
+}
+early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S
index a1b19ed7467c..423a5b3fc2be 100644
--- a/arch/arm64/kernel/sys32.S
+++ b/arch/arm64/kernel/sys32.S
@@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper)
  * extension.
  */
 compat_sys_pread64_wrapper:
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x3, x4, x5
 	b	sys_pread64
 ENDPROC(compat_sys_pread64_wrapper)
 
 compat_sys_pwrite64_wrapper:
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x3, x4, x5
 	b	sys_pwrite64
 ENDPROC(compat_sys_pwrite64_wrapper)
 
 compat_sys_truncate64_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	b	sys_truncate
 ENDPROC(compat_sys_truncate64_wrapper)
 
 compat_sys_ftruncate64_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	b	sys_ftruncate
 ENDPROC(compat_sys_ftruncate64_wrapper)
 
 compat_sys_readahead_wrapper:
-	orr	x1, x2, x3, lsl #32
+	regs_to_64	x1, x2, x3
 	mov	w2, w4
 	b	sys_readahead
 ENDPROC(compat_sys_readahead_wrapper)
 
 compat_sys_fadvise64_64_wrapper:
 	mov	w6, w1
-	orr	x1, x2, x3, lsl #32
-	orr	x2, x4, x5, lsl #32
+	regs_to_64	x1, x2, x3
+	regs_to_64	x2, x4, x5
 	mov	w3, w6
 	b	sys_fadvise64_64
 ENDPROC(compat_sys_fadvise64_64_wrapper)
 
 compat_sys_sync_file_range2_wrapper:
-	orr	x2, x2, x3, lsl #32
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
 	b	sys_sync_file_range2
 ENDPROC(compat_sys_sync_file_range2_wrapper)
 
 compat_sys_fallocate_wrapper:
-	orr	x2, x2, x3, lsl #32
-	orr	x3, x4, x5, lsl #32
+	regs_to_64	x2, x2, x3
+	regs_to_64	x3, x4, x5
 	b	sys_fallocate
 ENDPROC(compat_sys_fallocate_wrapper)
 
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index a551f88ae2c1..03dc3718eb13 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void)
 	return arch_timer_read_counter() * sched_clock_mult;
 }
 
-int read_current_timer(unsigned long *timer_value)
-{
-	*timer_value = arch_timer_read_counter();
-	return 0;
-}
-
 void __init time_init(void)
 {
 	u32 arch_timer_rate;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 000000000000..db8bb29c3852
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,590 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
+ *
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+#include <asm/smp_plat.h>
+
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+	per_cpu(cpu_scale, cpu) = power;
+}
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+	struct device_node *cpu_node;
+	int cpu;
+
+	cpu_node = of_parse_phandle(node, "cpu", 0);
+	if (!cpu_node)
+		return -1;
+
+	for_each_possible_cpu(cpu) {
+		if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+			of_node_put(cpu_node);
+			return cpu;
+		}
+	}
+
+	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+	of_node_put(cpu_node);
+	return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+			     int core_id)
+{
+	char name[10];
+	bool leaf = true;
+	int i = 0;
+	int cpu;
+	struct device_node *t;
+
+	do {
+		snprintf(name, sizeof(name), "thread%d", i);
+		t = of_get_child_by_name(core, name);
+		if (t) {
+			leaf = false;
+			cpu = get_cpu_for_node(t);
+			if (cpu >= 0) {
+				cpu_topology[cpu].cluster_id = cluster_id;
+				cpu_topology[cpu].core_id = core_id;
+				cpu_topology[cpu].thread_id = i;
+			} else {
+				pr_err("%s: Can't get CPU for thread\n",
+				       t->full_name);
+				of_node_put(t);
+				return -EINVAL;
+			}
+			of_node_put(t);
+		}
+		i++;
+	} while (t);
+
+	cpu = get_cpu_for_node(core);
+	if (cpu >= 0) {
+		if (!leaf) {
+			pr_err("%s: Core has both threads and CPU\n",
+			       core->full_name);
+			return -EINVAL;
+		}
+
+		cpu_topology[cpu].cluster_id = cluster_id;
+		cpu_topology[cpu].core_id = core_id;
+	} else if (leaf) {
+		pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+	char name[10];
+	bool leaf = true;
+	bool has_cores = false;
+	struct device_node *c;
+	static int cluster_id __initdata;
+	int core_id = 0;
+	int i, ret;
+
+	/*
+	 * First check for child clusters; we currently ignore any
+	 * information about the nesting of clusters and present the
+	 * scheduler with a flat list of them.
+	 */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "cluster%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			leaf = false;
+			ret = parse_cluster(c, depth + 1);
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	/* Now check for cores */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "core%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			has_cores = true;
+
+			if (depth == 0) {
+				pr_err("%s: cpu-map children should be clusters\n",
+				       c->full_name);
+				of_node_put(c);
+				return -EINVAL;
+			}
+
+			if (leaf) {
+				ret = parse_core(c, cluster_id, core_id++);
+			} else {
+				pr_err("%s: Non-leaf cluster with core %s\n",
+				       cluster->full_name, name);
+				ret = -EINVAL;
+			}
+
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	if (leaf && !has_cores)
+		pr_warn("%s: empty cluster\n", cluster->full_name);
+
+	if (leaf)
+		cluster_id++;
+
+	return 0;
+}
+
+struct cpu_efficiency {
+	const char *compatible;
+	unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+	{ "arm,cortex-a57", 3891 },
+	{ "arm,cortex-a53", 2048 },
+	{ NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu)	__cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to  (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static int __init parse_dt_topology(void)
+{
+	struct device_node *cn, *map;
+	int ret = 0;
+	int cpu;
+
+	cn = of_find_node_by_path("/cpus");
+	if (!cn) {
+		pr_err("No CPU information found in DT\n");
+		return 0;
+	}
+
+	/*
+	 * When topology is provided cpu-map is essentially a root
+	 * cluster with restricted subnodes.
+	 */
+	map = of_get_child_by_name(cn, "cpu-map");
+	if (!map)
+		goto out;
+
+	ret = parse_cluster(map, 0);
+	if (ret != 0)
+		goto out_map;
+
+	/*
+	 * Check that all cores are in the topology; the SMP code will
+	 * only mark cores described in the DT as possible.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (cpu_topology[cpu].cluster_id == -1) {
+			pr_err("CPU%d: No topology information specified\n",
+			       cpu);
+			ret = -EINVAL;
+		}
+	}
+
+out_map:
+	of_node_put(map);
+out:
+	of_node_put(cn);
+	return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+	const struct cpu_efficiency *cpu_eff;
+	struct device_node *cn;
+	unsigned long min_capacity = ULONG_MAX;
+	unsigned long max_capacity = 0;
+	unsigned long capacity = 0;
+	int cpu;
+
+	__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+				 GFP_NOWAIT);
+
+	for_each_possible_cpu(cpu) {
+		const u32 *rate;
+		int len;
+
+		/* Too early to use cpu->of_node */
+		cn = of_get_cpu_node(cpu, NULL);
+		if (!cn) {
+			pr_err("Missing device node for CPU %d\n", cpu);
+			continue;
+		}
+
+		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+			if (of_device_is_compatible(cn, cpu_eff->compatible))
+				break;
+
+		if (cpu_eff->compatible == NULL) {
+			pr_warn("%s: Unknown CPU type\n", cn->full_name);
+			continue;
+		}
+
+		rate = of_get_property(cn, "clock-frequency", &len);
+		if (!rate || len != 4) {
+			pr_err("%s: Missing clock-frequency property\n",
+				cn->full_name);
+			continue;
+		}
+
+		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+		/* Save min capacity of the system */
+		if (capacity < min_capacity)
+			min_capacity = capacity;
+
+		/* Save max capacity of the system */
+		if (capacity > max_capacity)
+			max_capacity = capacity;
+
+		cpu_capacity(cpu) = capacity;
+	}
+
+	/* If min and max capacities are equal we bypass the update of the
+	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
+	 * compute a middle_capacity factor that will ensure that the capacity
+	 * of an 'average' CPU of the system will be as close as possible to
+	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * constraint explained near table_efficiency[].
+	 */
+	if (min_capacity == max_capacity)
+		return;
+	else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+		middle_capacity = (min_capacity + max_capacity)
+				>> (SCHED_POWER_SHIFT+1);
+	else
+		middle_capacity = ((max_capacity / 3)
+				>> (SCHED_POWER_SHIFT-1)) + 1;
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+	if (!cpu_capacity(cpu))
+		return;
+
+	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+	pr_info("CPU%u: update cpu_power %lu\n",
+		cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	if (cpuid_topo->cluster_id == -1) {
+		/*
+		 * DT does not contain topology information for this cpu.
+		 */
+		pr_debug("CPU%u: No topology information configured\n", cpuid);
+		return;
+	}
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+}
+
+#ifdef CONFIG_SCHED_HMP
+
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ *  - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+	int cpu;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		if (cpu_logical_map(cpu) == mpidr)
+			return cpu;
+	return -EINVAL;
+}
+
+static const char * const little_cores[] = {
+	"arm,cortex-a53",
+	NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+	const char * const *lc;
+	for (lc = little_cores; *lc; lc++)
+		if (of_device_is_compatible(cn, *lc))
+			return true;
+	return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+					struct cpumask *slow)
+{
+	struct device_node *cn = NULL;
+	int cpu;
+
+	cpumask_clear(fast);
+	cpumask_clear(slow);
+
+	/*
+	 * Use the config options if they are given. This helps testing
+	 * HMP scheduling on systems without a big.LITTLE architecture.
+	 */
+	if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+		if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+			WARN(1, "Failed to parse HMP fast cpu mask!\n");
+		if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+			WARN(1, "Failed to parse HMP slow cpu mask!\n");
+		return;
+	}
+
+	/*
+	 * Else, parse device tree for little cores.
+	 */
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+		const u32 *mpidr;
+		int len;
+
+		mpidr = of_get_property(cn, "reg", &len);
+		if (!mpidr || len != 8) {
+			pr_err("%s missing reg property\n", cn->full_name);
+			continue;
+		}
+
+		cpu = get_logical_index(be32_to_cpup(mpidr+1));
+		if (cpu == -EINVAL) {
+			pr_err("couldn't get logical index for mpidr %x\n",
+							be32_to_cpup(mpidr+1));
+			break;
+		}
+
+		if (is_little_cpu(cn))
+			cpumask_set_cpu(cpu, slow);
+		else
+			cpumask_set_cpu(cpu, fast);
+	}
+
+	if (!cpumask_empty(fast) && !cpumask_empty(slow))
+		return;
+
+	/*
+	 * We didn't find both big and little cores so let's call all cores
+	 * fast as this will keep the system running, with all cores being
+	 * treated equal.
+	 */
+	cpumask_setall(fast);
+	cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+	struct cpumask hmp_fast_cpu_mask;
+	struct hmp_domain *domain;
+
+	arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+	/*
+	 * Initialize hmp_domains
+	 * Must be ordered with respect to compute capacity.
+	 * Fastest domain at head of list.
+	 */
+	if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+		domain = (struct hmp_domain *)
+			kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+		cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+		cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+		list_add(&domain->hmp_domains, hmp_domains_list);
+	}
+	domain = (struct hmp_domain *)
+		kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+	cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+	cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+	list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id:		cluster HW identifier
+ * @cluster_mask:	the cpumask location to be initialized, modified by the
+ *			function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+	int cpu;
+
+	if (!cluster_mask)
+		return -EINVAL;
+
+	for_each_online_cpu(cpu) {
+		if (socket_id == topology_physical_package_id(cpu)) {
+			cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+	update_siblings_masks(cpuid);
+	update_cpu_power(cpuid);
+}
+
+static void __init reset_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id = 0;
+		cpu_topo->cluster_id = -1;
+
+		cpumask_clear(&cpu_topo->core_sibling);
+		cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+		cpumask_clear(&cpu_topo->thread_sibling);
+		cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+	}
+}
+
+static void __init reset_cpu_power(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		set_power_scale(cpu, SCHED_POWER_SCALE);
+}
+
+void __init init_cpu_topology(void)
+{
+	reset_cpu_topology();
+
+	/*
+	 * Discard anything that was parsed if we hit an error so we
+	 * don't use partial information.
+	 */
+	if (parse_dt_topology())
+		reset_cpu_topology();
+
+	reset_cpu_power();
+	parse_dt_cpu_power();
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f30852d28590..7ffadddb645d 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
+#include <asm/debug-monitors.h>
 #include <asm/traps.h>
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
@@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
-#ifdef CONFIG_COMPAT
 	/* check for AArch32 breakpoint instructions */
-	if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0)
+	if (!aarch32_break_handler(regs))
 		return;
-#endif
 
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 0ea7a22bcdf2..84cafbc3eb54 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -58,7 +58,10 @@ static struct page *vectors_page[1];
 static int alloc_vectors_page(void)
 {
 	extern char __kuser_helper_start[], __kuser_helper_end[];
+	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
 	unsigned long vpage;
 
 	vpage = get_zeroed_page(GFP_ATOMIC);
@@ -72,7 +75,7 @@ static int alloc_vectors_page(void)
 
 	/* sigreturn code */
 	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
-		aarch32_sigret_code, sizeof(aarch32_sigret_code));
+               __aarch32_sigret_code_start, sigret_sz);
 
 	flush_icache_range(vpage, vpage + PAGE_SIZE);
 	vectors_page[0] = virt_to_page(vpage);
@@ -103,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 
 static int __init vdso_init(void)
 {
-	struct page *pg;
-	char *vbase;
-	int i, ret = 0;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
+	}
 
 	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
 	pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n",
 		vdso_pages + 1, vdso_pages, 1L, &vdso_start);
 
 	/* Allocate the vDSO pagelist, plus a page for the data. */
-	vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1),
+	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
 				GFP_KERNEL);
-	if (vdso_pagelist == NULL) {
-		pr_err("Failed to allocate vDSO pagelist!\n");
+	if (vdso_pagelist == NULL)
 		return -ENOMEM;
-	}
 
 	/* Grab the vDSO code pages. */
-	for (i = 0; i < vdso_pages; i++) {
-		pg = virt_to_page(&vdso_start + i*PAGE_SIZE);
-		ClearPageReserved(pg);
-		get_page(pg);
-		vdso_pagelist[i] = pg;
-	}
-
-	/* Sanity check the shared object header. */
-	vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL);
-	if (vbase == NULL) {
-		pr_err("Failed to map vDSO pagelist!\n");
-		return -ENOMEM;
-	} else if (memcmp(vbase, "\177ELF", 4)) {
-		pr_err("vDSO is not a valid ELF object!\n");
-		ret = -EINVAL;
-		goto unmap;
-	}
+	for (i = 0; i < vdso_pages; i++)
+		vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE);
 
 	/* Grab the vDSO data page. */
-	pg = virt_to_page(vdso_data);
-	get_page(pg);
-	vdso_pagelist[i] = pg;
+	vdso_pagelist[i] = virt_to_page(vdso_data);
 
-unmap:
-	vunmap(vbase);
-	return ret;
+	return 0;
 }
 arch_initcall(vdso_init);
 
@@ -153,11 +138,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 				int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
-	unsigned long vdso_base, vdso_mapping_len;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
 	int ret;
 
+	vdso_text_len = vdso_pages << PAGE_SHIFT;
 	/* Be sure to map the data page */
-	vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT;
+	vdso_mapping_len = vdso_text_len + PAGE_SIZE;
 
 	down_write(&mm->mmap_sem);
 	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
@@ -167,35 +153,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	}
 	mm->context.vdso = (void *)vdso_base;
 
-	ret = install_special_mapping(mm, vdso_base, vdso_mapping_len,
+	ret = install_special_mapping(mm, vdso_base, vdso_text_len,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				      vdso_pagelist);
-	if (ret) {
-		mm->context.vdso = NULL;
+	if (ret)
+		goto up_fail;
+
+	vdso_base += vdso_text_len;
+	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+				      VM_READ|VM_MAYREAD,
+				      vdso_pagelist + vdso_pages);
+	if (ret)
 		goto up_fail;
-	}
 
-up_fail:
 	up_write(&mm->mmap_sem);
+	return 0;
 
+up_fail:
+	mm->context.vdso = NULL;
+	up_write(&mm->mmap_sem);
 	return ret;
 }
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
+	unsigned long vdso_text;
+
+	if (!vma->vm_mm)
+		return NULL;
+
+	vdso_text = (unsigned long)vma->vm_mm->context.vdso;
+
 	/*
 	 * We can re-use the vdso pointer in mm_context_t for identifying
 	 * the vectors page for compat applications. The vDSO will always
 	 * sit above TASK_UNMAPPED_BASE and so we don't need to worry about
 	 * it conflicting with the vectors base.
 	 */
-	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) {
+	if (vma->vm_start == vdso_text) {
 #ifdef CONFIG_COMPAT
 		if (vma->vm_start == AARCH32_VECTORS_BASE)
 			return "[vectors]";
 #endif
 		return "[vdso]";
+	} else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) {
+		return "[vvar]";
 	}
 
 	return NULL;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 6d20b7d162d8..84b942612051 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -47,9 +47,9 @@ $(obj-vdso): %.o: %.S
 	$(call if_changed_dep,vdsoas)
 
 # Actual build commands
-quiet_cmd_vdsold = VDSOL $@
+quiet_cmd_vdsold = VDSOL   $@
       cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
-quiet_cmd_vdsoas = VDSOA $@
+quiet_cmd_vdsoas = VDSOA   $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 
 # Install commands for the unstripped file
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3fae2be8b016..55d0e035205f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,19 @@ ENTRY(stext)
 
 jiffies = jiffies_64;
 
+#define HYPERVISOR_TEXT					\
+	/*						\
+	 * Force the alignment to be compatible with	\
+	 * the vectors requirements			\
+	 */						\
+	. = ALIGN(2048);				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	*(.hyp.idmap.text)				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
+	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	*(.hyp.text)					\
+	VMLINUX_SYMBOL(__hyp_text_end) = .;
+
 SECTIONS
 {
 	/*
@@ -41,7 +54,6 @@ SECTIONS
 	}
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			*(.smp.pen.text)
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -49,6 +61,7 @@ SECTIONS
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			HYPERVISOR_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
@@ -56,7 +69,8 @@ SECTIONS
 	}
 
 	RO_DATA(PAGE_SIZE)
-
+	EXCEPTION_TABLE(8)
+	NOTES
 	_etext = .;			/* End of text and rodata section */
 
 	. = ALIGN(PAGE_SIZE);
@@ -82,45 +96,29 @@ SECTIONS
 	PERCPU_SECTION(64)
 
 	__init_end = .;
-	. = ALIGN(THREAD_SIZE);
-	__data_loc = .;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(64)
-		READ_MOSTLY_DATA(64)
-
-		/*
-		 * The exception fixup table (might need resorting at runtime)
-		 */
-		. = ALIGN(32);
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
-	_edata_loc = __data_loc + SIZEOF(.data);
 
-	NOTES
+	. = ALIGN(PAGE_SIZE);
+	_data = .;
+	_sdata = .;
+	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
+
+	. = ALIGN(PAGE_SIZE);
+	idmap_pg_dir = .;
+	. += IDMAP_DIR_SIZE;
+	swapper_pg_dir = .;
+	. += SWAPPER_DIR_SIZE;
+
 	_end = .;
 
 	STABS_DEBUG
 	.comment 0 : { *(.comment) }
 }
+
+/*
+ * The HYP init code can't be more than a page long.
+ */
+ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
+       "HYP init code too big")
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000000..8ba85e9ea388
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,63 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	select MMU_NOTIFIER
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select KVM_MMIO
+	select KVM_ARM_HOST
+	select KVM_ARM_VGIC
+	select KVM_ARM_TIMER
+	---help---
+	  Support hosting virtualized guest machines.
+
+	  If unsure, say N.
+
+config KVM_ARM_HOST
+	bool
+	---help---
+	  Provides host support for ARM processors.
+
+config KVM_ARM_MAX_VCPUS
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
+	help
+	  Static number of max supported virtual CPUs per VM.
+
+	  If you choose a high number, the vcpu structures will be quite
+	  large, so only choose a reasonable number that you expect to
+	  actually use.
+
+config KVM_ARM_VGIC
+	bool
+	depends on KVM_ARM_HOST && OF
+	select HAVE_KVM_IRQCHIP
+	---help---
+	  Adds support for a hardware assisted, in-kernel GIC emulation.
+
+config KVM_ARM_TIMER
+	bool
+	depends on KVM_ARM_VGIC
+	---help---
+	  Adds support for the Architected Timers in virtual machines.
+
+endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000000..32a096174b94
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,27 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000000000000..124418d17049
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,158 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+	if (esr & ESR_EL2_CV)
+		return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT;
+
+	return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr;
+	u32 cpsr_cond;
+	int cond;
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	/* Is condition field valid? */
+	cond = kvm_vcpu_get_condition(vcpu);
+	if (cond == 0xE)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	if (cond < 0) {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	cpsr_cond = cpsr >> 28;
+
+	if (!((cc_map[cond] >> cpsr_cond) & 1))
+		return false;
+
+	return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+	if (!(cpsr & COMPAT_PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~COMPAT_PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000000..76794692c20b
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/*
+	 * Because the kvm_regs structure is a mix of 32, 64 and
+	 * 128bit fields, we index it as if it was a 32bit
+	 * array. Hence below, nr_regs is the number of entries, and
+	 * off the index in the "array".
+	 */
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	u32 off;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	__uint128_t tmp;
+	void *valp = &tmp;
+	u64 off;
+	int err = 0;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+		switch (mode) {
+		case COMPAT_PSR_MODE_USR:
+		case COMPAT_PSR_MODE_FIQ:
+		case COMPAT_PSR_MODE_IRQ:
+		case COMPAT_PSR_MODE_SVC:
+		case COMPAT_PSR_MODE_ABT:
+		case COMPAT_PSR_MODE_UND:
+		case PSR_MODE_EL0t:
+		case PSR_MODE_EL1t:
+		case PSR_MODE_EL1h:
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+	return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return -EFAULT;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+                + NUM_TIMER_REGS;
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+	int ret;
+
+	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+		if (put_user(core_reg | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
+	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we want a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return get_core_reg(vcpu, reg);
+
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we set a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return set_core_reg(vcpu, reg);
+
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	switch (implementor) {
+	case ARM_CPU_IMP_ARM:
+		switch (part_number) {
+		case ARM_CPU_PART_AEM_V8:
+			return KVM_ARM_TARGET_AEM_V8;
+		case ARM_CPU_PART_FOUNDATION:
+			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A53:
+			return KVM_ARM_TARGET_CORTEX_A53;
+		case ARM_CPU_PART_CORTEX_A57:
+			return KVM_ARM_TARGET_CORTEX_A57;
+		};
+		break;
+	case ARM_CPU_IMP_APM:
+		switch (part_number) {
+		case APM_CPU_PART_POTENZA:
+			return KVM_ARM_TARGET_XGENE_POTENZA;
+		};
+		break;
+	};
+
+	return -EINVAL;
+}
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	vcpu->arch.target = phys_target;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		if (init->features[i / 32] & (1 << (i % 32))) {
+			if (i >= KVM_VCPU_MAX_FEATURES)
+				return -ENOENT;
+			set_bit(i, vcpu->arch.features);
+		}
+	}
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000000..e28be510380c
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int ret;
+
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
+		return 1;
+	}
+
+	return ret;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ *		    instruction executed by a guest
+ *
+ * @vcpu:	the vcpu pointer
+ *
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
+	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
+	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_32,
+	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_64,
+	[ESR_EL2_EC_HVC32]	= handle_hvc,
+	[ESR_EL2_EC_SMC32]	= handle_smc,
+	[ESR_EL2_EC_HVC64]	= handle_hvc,
+	[ESR_EL2_EC_SMC64]	= handle_smc,
+	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
+	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unknown exception class: hsr: %#08x\n",
+			(unsigned int)kvm_vcpu_get_hsr(vcpu));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_TRAP:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000000..d968796f4b2d
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.idmap.text, "ax"
+
+	.align	11
+
+ENTRY(__kvm_hyp_init)
+	ventry	__invalid		// Synchronous EL2t
+	ventry	__invalid		// IRQ EL2t
+	ventry	__invalid		// FIQ EL2t
+	ventry	__invalid		// Error EL2t
+
+	ventry	__invalid		// Synchronous EL2h
+	ventry	__invalid		// IRQ EL2h
+	ventry	__invalid		// FIQ EL2h
+	ventry	__invalid		// Error EL2h
+
+	ventry	__do_hyp_init		// Synchronous 64-bit EL1
+	ventry	__invalid		// IRQ 64-bit EL1
+	ventry	__invalid		// FIQ 64-bit EL1
+	ventry	__invalid		// Error 64-bit EL1
+
+	ventry	__invalid		// Synchronous 32-bit EL1
+	ventry	__invalid		// IRQ 32-bit EL1
+	ventry	__invalid		// FIQ 32-bit EL1
+	ventry	__invalid		// Error 32-bit EL1
+
+__invalid:
+	b	.
+
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP pgd
+	 * x2: HYP stack
+	 * x3: HYP vectors
+	 */
+__do_hyp_init:
+
+	msr	ttbr0_el2, x0
+
+	mrs	x4, tcr_el1
+	ldr	x5, =TCR_EL2_MASK
+	and	x4, x4, x5
+	ldr	x5, =TCR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	tcr_el2, x4
+
+	ldr	x4, =VTCR_EL2_FLAGS
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
+	msr	vtcr_el2, x4
+
+	mrs	x4, mair_el1
+	msr	mair_el2, x4
+	isb
+
+	mrs	x4, sctlr_el2
+	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	sctlr_el2, x4
+	isb
+
+	/* MMU is now enabled. Get ready for the trampoline dance */
+	ldr	x4, =TRAMPOLINE_VA
+	adr	x5, target
+	bfi	x4, x5, #0, #PAGE_SHIFT
+	br	x4
+
+target: /* We're now in the trampoline code, switch page tables */
+	msr	ttbr0_el2, x1
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Set the stack and new vectors */
+	kern_hyp_va	x2
+	mov	sp, x2
+	kern_hyp_va	x3
+	msr	vbar_el2, x3
+
+	/* Hello, World! */
+	eret
+ENDPROC(__kvm_hyp_init)
+
+	.ltorg
+
+	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000000..b72aa9f9215c
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+	.align	PAGE_SHIFT
+
+.macro save_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	stp	x19, x20, [x3]
+	stp	x21, x22, [x3, #16]
+	stp	x23, x24, [x3, #32]
+	stp	x25, x26, [x3, #48]
+	stp	x27, x28, [x3, #64]
+	stp	x29, lr, [x3, #80]
+
+	mrs	x19, sp_el0
+	mrs	x20, elr_el2		// EL1 PC
+	mrs	x21, spsr_el2		// EL1 pstate
+
+	stp	x19, x20, [x3, #96]
+	str	x21, [x3, #112]
+
+	mrs	x22, sp_el1
+	mrs	x23, elr_el1
+	mrs	x24, spsr_el1
+
+	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+	msr	sp_el1, x22
+	msr	elr_el1, x23
+	msr	spsr_el1, x24
+
+	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
+	ldp	x19, x20, [x3]
+	ldr	x21, [x3, #16]
+
+	msr	sp_el0, x19
+	msr	elr_el2, x20 				// EL1 PC
+	msr	spsr_el2, x21 				// EL1 pstate
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	ldp	x19, x20, [x3]
+	ldp	x21, x22, [x3, #16]
+	ldp	x23, x24, [x3, #32]
+	ldp	x25, x26, [x3, #48]
+	ldp	x27, x28, [x3, #64]
+	ldp	x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+	save_common_regs
+.endm
+
+.macro restore_host_regs
+	restore_common_regs
+.endm
+
+.macro save_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+	// x0 is the vcpu address
+	// x1 is the return code, do not corrupt!
+	// x2 is the cpu context
+	// x3 is a tmp register
+	// Guest's x0-x3 are on the stack
+
+	// Compute base to save registers
+	add	x3, x2, #CPU_XREG_OFFSET(4)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	str	x18, [x3, #112]
+
+	pop	x6, x7			// x2, x3
+	pop	x4, x5			// x0, x1
+
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	save_common_regs
+.endm
+
+.macro restore_guest_regs
+	// x0 is the vcpu address.
+	// x2 is the cpu context
+	// x3 is a tmp register
+
+	// Prepare x0-x3 for later restore
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	push	x4, x5		// Push x0-x3 on the stack
+	push	x6, x7
+
+	// x4-x18
+	ldp	x4, x5, [x3, #32]
+	ldp	x6, x7, [x3, #48]
+	ldp	x8, x9, [x3, #64]
+	ldp	x10, x11, [x3, #80]
+	ldp	x12, x13, [x3, #96]
+	ldp	x14, x15, [x3, #112]
+	ldp	x16, x17, [x3, #128]
+	ldr	x18, [x3, #144]
+
+	// x19-x29, lr, sp*, elr*, spsr*
+	restore_common_regs
+
+	// Last bits of the 64bit state
+	pop	x2, x3
+	pop	x0, x1
+
+	// Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	mrs	x4,	vmpidr_el2
+	mrs	x5,	csselr_el1
+	mrs	x6,	sctlr_el1
+	mrs	x7,	actlr_el1
+	mrs	x8,	cpacr_el1
+	mrs	x9,	ttbr0_el1
+	mrs	x10,	ttbr1_el1
+	mrs	x11,	tcr_el1
+	mrs	x12,	esr_el1
+	mrs	x13, 	afsr0_el1
+	mrs	x14,	afsr1_el1
+	mrs	x15,	far_el1
+	mrs	x16,	mair_el1
+	mrs	x17,	vbar_el1
+	mrs	x18,	contextidr_el1
+	mrs	x19,	tpidr_el0
+	mrs	x20,	tpidrro_el0
+	mrs	x21,	tpidr_el1
+	mrs	x22, 	amair_el1
+	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
+	mrs	x25,	mdscr_el1
+
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	stp	x18, x19, [x3, #112]
+	stp	x20, x21, [x3, #128]
+	stp	x22, x23, [x3, #144]
+	stp	x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbcr15_el1
+	mrs	x19, dbgbcr14_el1
+	mrs	x18, dbgbcr13_el1
+	mrs	x17, dbgbcr12_el1
+	mrs	x16, dbgbcr11_el1
+	mrs	x15, dbgbcr10_el1
+	mrs	x14, dbgbcr9_el1
+	mrs	x13, dbgbcr8_el1
+	mrs	x12, dbgbcr7_el1
+	mrs	x11, dbgbcr6_el1
+	mrs	x10, dbgbcr5_el1
+	mrs	x9, dbgbcr4_el1
+	mrs	x8, dbgbcr3_el1
+	mrs	x7, dbgbcr2_el1
+	mrs	x6, dbgbcr1_el1
+	mrs	x5, dbgbcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbvr15_el1
+	mrs	x19, dbgbvr14_el1
+	mrs	x18, dbgbvr13_el1
+	mrs	x17, dbgbvr12_el1
+	mrs	x16, dbgbvr11_el1
+	mrs	x15, dbgbvr10_el1
+	mrs	x14, dbgbvr9_el1
+	mrs	x13, dbgbvr8_el1
+	mrs	x12, dbgbvr7_el1
+	mrs	x11, dbgbvr6_el1
+	mrs	x10, dbgbvr5_el1
+	mrs	x9, dbgbvr4_el1
+	mrs	x8, dbgbvr3_el1
+	mrs	x7, dbgbvr2_el1
+	mrs	x6, dbgbvr1_el1
+	mrs	x5, dbgbvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwcr15_el1
+	mrs	x19, dbgwcr14_el1
+	mrs	x18, dbgwcr13_el1
+	mrs	x17, dbgwcr12_el1
+	mrs	x16, dbgwcr11_el1
+	mrs	x15, dbgwcr10_el1
+	mrs	x14, dbgwcr9_el1
+	mrs	x13, dbgwcr8_el1
+	mrs	x12, dbgwcr7_el1
+	mrs	x11, dbgwcr6_el1
+	mrs	x10, dbgwcr5_el1
+	mrs	x9, dbgwcr4_el1
+	mrs	x8, dbgwcr3_el1
+	mrs	x7, dbgwcr2_el1
+	mrs	x6, dbgwcr1_el1
+	mrs	x5, dbgwcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwvr15_el1
+	mrs	x19, dbgwvr14_el1
+	mrs	x18, dbgwvr13_el1
+	mrs	x17, dbgwvr12_el1
+	mrs	x16, dbgwvr11_el1
+	mrs	x15, dbgwvr10_el1
+	mrs	x14, dbgwvr9_el1
+	mrs	x13, dbgwvr8_el1
+	mrs	x12, dbgwvr7_el1
+	mrs	x11, dbgwvr6_el1
+	mrs	x10, dbgwvr5_el1
+	mrs	x9, dbgwvr4_el1
+	mrs	x8, dbgwvr3_el1
+	mrs	x7, dbgwvr2_el1
+	mrs	x6, dbgwvr1_el1
+	mrs	x5, dbgwvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	mrs	x21, mdccint_el1
+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+.endm
+
+.macro restore_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	ldp	x8, x9, [x3, #32]
+	ldp	x10, x11, [x3, #48]
+	ldp	x12, x13, [x3, #64]
+	ldp	x14, x15, [x3, #80]
+	ldp	x16, x17, [x3, #96]
+	ldp	x18, x19, [x3, #112]
+	ldp	x20, x21, [x3, #128]
+	ldp	x22, x23, [x3, #144]
+	ldp	x24, x25, [x3, #160]
+
+	msr	vmpidr_el2,	x4
+	msr	csselr_el1,	x5
+	msr	sctlr_el1,	x6
+	msr	actlr_el1,	x7
+	msr	cpacr_el1,	x8
+	msr	ttbr0_el1,	x9
+	msr	ttbr1_el1,	x10
+	msr	tcr_el1,	x11
+	msr	esr_el1,	x12
+	msr	afsr0_el1,	x13
+	msr	afsr1_el1,	x14
+	msr	far_el1,	x15
+	msr	mair_el1,	x16
+	msr	vbar_el1,	x17
+	msr	contextidr_el1,	x18
+	msr	tpidr_el0,	x19
+	msr	tpidrro_el0,	x20
+	msr	tpidr_el1,	x21
+	msr	amair_el1,	x22
+	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
+	msr	mdscr_el1,	x25
+.endm
+
+.macro restore_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbcr15_el1, x20
+	msr	dbgbcr14_el1, x19
+	msr	dbgbcr13_el1, x18
+	msr	dbgbcr12_el1, x17
+	msr	dbgbcr11_el1, x16
+	msr	dbgbcr10_el1, x15
+	msr	dbgbcr9_el1, x14
+	msr	dbgbcr8_el1, x13
+	msr	dbgbcr7_el1, x12
+	msr	dbgbcr6_el1, x11
+	msr	dbgbcr5_el1, x10
+	msr	dbgbcr4_el1, x9
+	msr	dbgbcr3_el1, x8
+	msr	dbgbcr2_el1, x7
+	msr	dbgbcr1_el1, x6
+	msr	dbgbcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbvr15_el1, x20
+	msr	dbgbvr14_el1, x19
+	msr	dbgbvr13_el1, x18
+	msr	dbgbvr12_el1, x17
+	msr	dbgbvr11_el1, x16
+	msr	dbgbvr10_el1, x15
+	msr	dbgbvr9_el1, x14
+	msr	dbgbvr8_el1, x13
+	msr	dbgbvr7_el1, x12
+	msr	dbgbvr6_el1, x11
+	msr	dbgbvr5_el1, x10
+	msr	dbgbvr4_el1, x9
+	msr	dbgbvr3_el1, x8
+	msr	dbgbvr2_el1, x7
+	msr	dbgbvr1_el1, x6
+	msr	dbgbvr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwcr15_el1, x20
+	msr	dbgwcr14_el1, x19
+	msr	dbgwcr13_el1, x18
+	msr	dbgwcr12_el1, x17
+	msr	dbgwcr11_el1, x16
+	msr	dbgwcr10_el1, x15
+	msr	dbgwcr9_el1, x14
+	msr	dbgwcr8_el1, x13
+	msr	dbgwcr7_el1, x12
+	msr	dbgwcr6_el1, x11
+	msr	dbgwcr5_el1, x10
+	msr	dbgwcr4_el1, x9
+	msr	dbgwcr3_el1, x8
+	msr	dbgwcr2_el1, x7
+	msr	dbgwcr1_el1, x6
+	msr	dbgwcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwvr15_el1, x20
+	msr	dbgwvr14_el1, x19
+	msr	dbgwvr13_el1, x18
+	msr	dbgwvr12_el1, x17
+	msr	dbgwvr11_el1, x16
+	msr	dbgwvr10_el1, x15
+	msr	dbgwvr9_el1, x14
+	msr	dbgwvr8_el1, x13
+	msr	dbgwvr7_el1, x12
+	msr	dbgwvr6_el1, x11
+	msr	dbgwvr5_el1, x10
+	msr	dbgwvr4_el1, x9
+	msr	dbgwvr3_el1, x8
+	msr	dbgwvr2_el1, x7
+	msr	dbgwvr1_el1, x6
+	msr	dbgwvr0_el1, x5
+
+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+	msr	mdccint_el1, x21
+.endm
+
+.macro skip_32bit_state tmp, target
+	// Skip 32bit state if not needed
+	mrs	\tmp, hcr_el2
+	tbnz	\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+	// Skip ThumbEE state if not needed
+	mrs	\tmp, id_pfr0_el1
+	tbz	\tmp, #12, \target
+.endm
+
+.macro skip_debug_state tmp, target
+	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
+	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+	// is set, we do a full save/restore cycle and disable trapping.
+	add	x25, x0, #VCPU_CONTEXT
+
+	// Check the state of MDSCR_EL1
+	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+	and	x26, x25, #DBG_MDSCR_KDE
+	and	x25, x25, #DBG_MDSCR_MDE
+	adds	xzr, x25, x26
+	b.eq	9998f		// Nothing to see there
+
+	// If any interesting bits was set, we must set the flag
+	mov	x26, #KVM_ARM64_DEBUG_DIRTY
+	str	x26, [x0, #VCPU_DEBUG_FLAGS]
+	b	9999f		// Don't skip restore
+
+9998:
+	// Otherwise load the flags from memory in case we recently
+	// trapped
+	skip_debug_state x25, \target
+9999:
+.endm
+
+.macro save_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	mrs	x4, spsr_abt
+	mrs	x5, spsr_und
+	mrs	x6, spsr_irq
+	mrs	x7, spsr_fiq
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	mrs	x4, dacr32_el2
+	mrs	x5, ifsr32_el2
+	mrs	x6, fpexc32_el2
+	stp	x4, x5, [x3]
+	str	x6, [x3, #16]
+
+	skip_debug_state x8, 2f
+	mrs	x7, dbgvcr32_el2
+	str	x7, [x3, #24]
+2:
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	mrs	x4, teecr32_el1
+	mrs	x5, teehbr32_el1
+	stp	x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	spsr_abt, x4
+	msr	spsr_und, x5
+	msr	spsr_irq, x6
+	msr	spsr_fiq, x7
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	ldp	x4, x5, [x3]
+	ldr	x6, [x3, #16]
+	msr	dacr32_el2, x4
+	msr	ifsr32_el2, x5
+	msr	fpexc32_el2, x6
+
+	skip_debug_state x8, 2f
+	ldr	x7, [x3, #24]
+	msr	dbgvcr32_el2, x7
+2:
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	ldp	x4, x5, [x3]
+	msr	teecr32_el1, x4
+	msr	teehbr32_el1, x5
+1:
+.endm
+
+.macro activate_traps
+	ldr     x2, [x0, #VCPU_HCR_EL2]
+	msr     hcr_el2, x2
+	ldr	x2, =(CPTR_EL2_TTA)
+	msr	cptr_el2, x2
+
+	ldr	x2, =(1 << 15)	// Trap CP15 Cr=15
+	msr	hstr_el2, x2
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+	// if not dirty.
+	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
+	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+	orr	x2, x2,  #MDCR_EL2_TDA
+1:
+	msr	mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+	mov	x2, #HCR_RW
+	msr	hcr_el2, x2
+	msr	cptr_el2, xzr
+	msr	hstr_el2, xzr
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	msr	mdcr_el2, x2
+.endm
+
+.macro activate_vm
+	ldr	x1, [x0, #VCPU_KVM]
+	kern_hyp_va	x1
+	ldr	x2, [x1, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+	msr	vttbr_el2, xzr
+.endm
+
+/*
+ * Call into the vgic backend for state saving
+ */
+.macro save_vgic_state
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, VGIC_SAVE_FN]
+	kern_hyp_va	x24
+	blr	x24
+	mrs	x24, hcr_el2
+	mov	x25, #HCR_INT_OVERRIDE
+	neg	x25, x25
+	and	x24, x24, x25
+	msr	hcr_el2, x24
+.endm
+
+/*
+ * Call into the vgic backend for state restoring
+ */
+.macro restore_vgic_state
+	mrs	x24, hcr_el2
+	ldr	x25, [x0, #VCPU_IRQ_LINES]
+	orr	x24, x24, #HCR_INT_OVERRIDE
+	orr	x24, x24, x25
+	msr	hcr_el2, x24
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, #VGIC_RESTORE_FN]
+	kern_hyp_va	x24
+	blr	x24
+.endm
+
+.macro save_timer_state
+	// x0: vcpu pointer
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	mrs	x3, cntv_ctl_el0
+	and	x3, x3, #3
+	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
+	bic	x3, x3, #1		// Clear Enable
+	msr	cntv_ctl_el0, x3
+
+	isb
+
+	mrs	x3, cntv_cval_el0
+	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+	// Allow physical timer/counter access for the host
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #3
+	msr	cnthctl_el2, x2
+
+	// Clear cntvoff for the host
+	msr	cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+	// x0: vcpu pointer
+	// Disallow physical timer access for the guest
+	// Physical counter access is allowed
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #1
+	bic	x2, x2, #2
+	msr	cnthctl_el2, x2
+
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
+	msr	cntvoff_el2, x3
+	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+	msr	cntv_cval_el0, x2
+	isb
+
+	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
+	and	x2, x2, #3
+	msr	cntv_ctl_el0, x2
+1:
+.endm
+
+__save_sysregs:
+	save_sysregs
+	ret
+
+__restore_sysregs:
+	restore_sysregs
+	ret
+
+__save_debug:
+	save_debug
+	ret
+
+__restore_debug:
+	restore_debug
+	ret
+
+__save_fpsimd:
+	save_fpsimd
+	ret
+
+__restore_fpsimd:
+	restore_fpsimd
+	ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+	kern_hyp_va	x0
+	msr	tpidr_el2, x0	// Save the vcpu register
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	save_host_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	compute_debug_state 1f
+	bl	__save_debug
+1:
+	activate_traps
+	activate_vm
+
+	restore_vgic_state
+	restore_timer_state
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	bl	__restore_debug
+1:
+	restore_guest_32bit_state
+	restore_guest_regs
+
+	// That's it, no more messing around.
+	eret
+
+__kvm_vcpu_return:
+	// Assume x0 is the vcpu pointer, x1 the return code
+	// Guest's x0-x3 are on the stack
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	save_guest_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	skip_debug_state x3, 1f
+	bl	__save_debug
+1:
+	save_guest_32bit_state
+
+	save_timer_state
+	save_vgic_state
+
+	deactivate_traps
+	deactivate_vm
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	// Clear the dirty flag for the next run, as all the state has
+	// already been saved. Note that we nuke the whole 64bit word.
+	// If we ever add more flags, we'll have to be more careful...
+	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
+	bl	__restore_debug
+1:
+	restore_host_regs
+
+	mov	x0, x1
+	ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
+	kern_hyp_va	x0
+	ldr	x2, [x0, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+	isb
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	tlbi	ipas2e1is, x1
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb	ish
+	tlbi	vmalle1is
+	dsb	ish
+	isb
+
+	msr	vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
+	tlbi	alle1is
+	ic	ialluis
+	dsb	ish
+	ret
+ENDPROC(__kvm_flush_vm_context)
+
+	// struct vgic_sr_vectors __vgi_sr_vectors;
+	.align 3
+ENTRY(__vgic_sr_vectors)
+	.skip	VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
+__kvm_hyp_panic:
+	// Guess the context by looking at VTTBR:
+	// If zero, then we're already a host.
+	// Otherwise restore a minimal host context before panicing.
+	mrs	x0, vttbr_el2
+	cbz	x0, 1f
+
+	mrs	x0, tpidr_el2
+
+	deactivate_traps
+	deactivate_vm
+
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+
+1:	adr	x0, __hyp_panic_str
+	adr	x1, 2f
+	ldp	x2, x3, [x1]
+	sub	x0, x0, x2
+	add	x0, x0, x3
+	mrs	x1, spsr_el2
+	mrs	x2, elr_el2
+	mrs	x3, esr_el2
+	mrs	x4, far_el2
+	mrs	x5, hpfar_el2
+	mrs	x6, par_el1
+	mrs	x7, tpidr_el2
+
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+
+	.align	3
+2:	.quad	HYP_PAGE_OFFSET
+	.quad	PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+	.align	2
+
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector	label, target
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
+	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_error_invalid, __kvm_hyp_panic
+
+el1_sync:					// Guest trapped into EL2
+	push	x0, x1
+	push	x2, x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_EL2_EC_SHIFT
+
+	cmp	x2, #ESR_EL2_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap			// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	pop	x2, x3
+	pop	x0, x1
+
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	push	lr, xzr
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	pop	lr, xzr
+2:	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+	cmp	x2, #ESR_EL2_EC_DABT
+	mov	x0, #ESR_EL2_EC_IABT
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+	and	x2, x1, #ESR_EL2_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	w1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__kvm_vcpu_return
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	pop	x2, x3
+	pop	x0, x1
+
+	eret
+
+el1_irq:
+	push	x0, x1
+	push	x2, x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+	.popsection
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000000..81a02a8762b0
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,203 @@
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+				 PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+	unsigned long cpsr;
+	unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+	bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+	cpsr = mode | COMPAT_PSR_I_BIT;
+
+	if (sctlr & (1 << 30))
+		cpsr |= COMPAT_PSR_T_BIT;
+	if (sctlr & (1 << 25))
+		cpsr |= COMPAT_PSR_E_BIT;
+
+	*vcpu_cpsr(vcpu) = cpsr;
+
+	/* Note: These now point to the banked copies */
+	*vcpu_spsr(vcpu) = new_spsr_value;
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+	/* Branch to exception vector */
+	if (sctlr & (1 << 13))
+		vect_offset += 0xffff0000;
+	else /* always have security exceptions */
+		vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+	*vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+			 unsigned long addr)
+{
+	u32 vect_offset;
+	u32 *far, *fsr;
+	bool is_lpae;
+
+	if (is_pabt) {
+		vect_offset = 12;
+		far = &vcpu_cp15(vcpu, c6_IFAR);
+		fsr = &vcpu_cp15(vcpu, c5_IFSR);
+	} else { /* !iabt */
+		vect_offset = 16;
+		far = &vcpu_cp15(vcpu, c6_DFAR);
+		fsr = &vcpu_cp15(vcpu, c5_DFSR);
+	}
+
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+	*far = addr;
+
+	/* Give the guest an IMPLEMENTATION DEFINED exception */
+	is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+	if (is_lpae)
+		*fsr = 1 << 9 | 0x34;
+	else
+		*fsr = 0x14;
+}
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_aarch32;
+	u32 esr = 0;
+
+	is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+	/*
+	 * Build an {i,d}abort, depending on the level and the
+	 * instruction set. Report an external synchronous abort.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	/*
+	 * Here, the guest runs in AArch64 mode when in EL1. If we get
+	 * an AArch32 fault, it means we managed to trap an EL0 fault.
+	 */
+	if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+		esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT);
+	else
+		esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT);
+
+	if (!is_iabt)
+		esr |= ESR_EL1_EC_DABT_EL0;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	/*
+	 * Build an unknown exception, depending on the instruction
+	 * set.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, false, addr);
+
+	inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, true, addr);
+
+	inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_undef32(vcpu);
+
+	inject_undef64(vcpu);
+}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000000..bbc6ae32e4af
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000000..70a7816535cd
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <kvm/arm_arch_timer.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+			PSR_F_BIT | PSR_D_BIT),
+};
+
+static const struct kvm_regs default_regs_reset32 = {
+	.regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+			COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+	.irq	= 27,
+	.level	= 1,
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x20);
+}
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_ARM_EL1_32BIT:
+		r = cpu_has_32bit_el1();
+		break;
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+	const struct kvm_irq_level *cpu_vtimer_irq;
+	const struct kvm_regs *cpu_reset;
+
+	switch (vcpu->arch.target) {
+	default:
+		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+			if (!cpu_has_32bit_el1())
+				return -EINVAL;
+			cpu_reset = &default_regs_reset32;
+			vcpu->arch.hcr_el2 &= ~HCR_RW;
+		} else {
+			cpu_reset = &default_regs_reset;
+		}
+
+		cpu_vtimer_irq = &default_vtimer_irq;
+		break;
+	}
+
+	/* Reset core registers */
+	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+	/* Reset system registers */
+	kvm_reset_sys_regs(vcpu);
+
+	/* Reset timer */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
+	return 0;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000000..4cc3b719208e
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,1528 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+	u32 ccsidr;
+
+	/* Make sure noone else changes CSSELR during this! */
+	local_irq_disable();
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	local_irq_enable();
+
+	return ccsidr;
+}
+
+static void do_dc_cisw(u32 val)
+{
+	asm volatile("dc cisw, %x0" : : "r" (val));
+	dsb(ish);
+}
+
+static void do_dc_csw(u32 val)
+{
+	asm volatile("dc csw, %x0" : : "r" (val));
+	dsb(ish);
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	unsigned long val;
+	int cpu;
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	cpu = get_cpu();
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+	/* If we were already preempted, take the long way around */
+	if (cpu != vcpu->arch.last_pcpu) {
+		flush_cache_all();
+		goto done;
+	}
+
+	val = *vcpu_reg(vcpu, p->Rt);
+
+	switch (p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		do_dc_cisw(val);
+		break;
+
+	case 10:		/* DCCSW */
+		do_dc_csw(val);
+		break;
+	}
+
+done:
+	put_cpu();
+
+	return true;
+}
+
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	unsigned long val;
+
+	BUG_ON(!p->is_write);
+
+	val = *vcpu_reg(vcpu, p->Rt);
+	if (!p->is_aarch32) {
+		vcpu_sys_reg(vcpu, r->reg) = val;
+	} else {
+		if (!p->is_32bit)
+			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
+		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+	}
+
+	return true;
+}
+
+/*
+ * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ */
+static bool access_sctlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
+		vcpu->arch.hcr_el2 &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
+
+	return true;
+}
+
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		return true;
+	}
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+				   const struct sys_reg_params *p,
+				   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u32 val;
+		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+		*vcpu_reg(vcpu, p->Rt) = val;
+		return true;
+	}
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ * 
+ * - If we've touched any debug register, it is likely that we're
+ *   going to touch more of them. It then makes sense to disable the
+ *   traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ *   then mandatory to save/restore the registers, as the guest
+ *   depends on them.
+ * 
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ *   disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ *   set the dirty bit, disable the traps, save host registers,
+ *   restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ *   registers and clear the dirty bit. This ensure that the host can
+ *   now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 amair;
+
+	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	/*
+	 * Simply map the vcpu_id into the Aff0 field of the MPIDR.
+	 */
+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
+}
+
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
+	/* DBGBVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
+	/* DBGBCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
+	/* DBGWVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
+	/* DBGWCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+	/* DC ISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+	  access_dcsw },
+	/* DC CSW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+	  access_dcsw },
+	/* DC CISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+	  access_dcsw },
+
+	DBG_BCR_BVR_WCR_WVR_EL1(0),
+	DBG_BCR_BVR_WCR_WVR_EL1(1),
+	/* MDCCINT_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	/* MDSCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	DBG_BCR_BVR_WCR_WVR_EL1(2),
+	DBG_BCR_BVR_WCR_WVR_EL1(3),
+	DBG_BCR_BVR_WCR_WVR_EL1(4),
+	DBG_BCR_BVR_WCR_WVR_EL1(5),
+	DBG_BCR_BVR_WCR_WVR_EL1(6),
+	DBG_BCR_BVR_WCR_WVR_EL1(7),
+	DBG_BCR_BVR_WCR_WVR_EL1(8),
+	DBG_BCR_BVR_WCR_WVR_EL1(9),
+	DBG_BCR_BVR_WCR_WVR_EL1(10),
+	DBG_BCR_BVR_WCR_WVR_EL1(11),
+	DBG_BCR_BVR_WCR_WVR_EL1(12),
+	DBG_BCR_BVR_WCR_WVR_EL1(13),
+	DBG_BCR_BVR_WCR_WVR_EL1(14),
+	DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+	/* MDRAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  trap_raz_wi },
+	/* OSLAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+	  trap_raz_wi },
+	/* OSLSR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+	  trap_oslsr_el1 },
+	/* OSDLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGPRCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGCLAIMSET_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGCLAIMCLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGAUTHSTATUS_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+	  trap_dbgauthstatus_el1 },
+
+	/* TEECR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEECR32_EL1, 0 },
+	/* TEEHBR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEEHBR32_EL1, 0 },
+
+	/* MDCCSR_EL1 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR[TR]X_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+	  trap_raz_wi },
+
+	/* DBGVCR32_EL2 */
+	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+	  NULL, reset_val, DBGVCR32_EL2, 0 },
+
+	/* MPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+	  NULL, reset_mpidr, MPIDR_EL1 },
+	/* SCTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
+	/* CPACR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, CPACR_EL1, 0 },
+	/* TTBR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+	  access_vm_reg, reset_unknown, TTBR0_EL1 },
+	/* TTBR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+	  access_vm_reg, reset_unknown, TTBR1_EL1 },
+	/* TCR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+	  access_vm_reg, reset_val, TCR_EL1, 0 },
+
+	/* AFSR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+	  access_vm_reg, reset_unknown, AFSR0_EL1 },
+	/* AFSR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+	  access_vm_reg, reset_unknown, AFSR1_EL1 },
+	/* ESR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+	  access_vm_reg, reset_unknown, ESR_EL1 },
+	/* FAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+	  access_vm_reg, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
+
+	/* PMINTENSET_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+	  trap_raz_wi },
+	/* PMINTENCLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+	  trap_raz_wi },
+
+	/* MAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+	  access_vm_reg, reset_unknown, MAIR_EL1 },
+	/* AMAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+
+	/* VBAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, VBAR_EL1, 0 },
+	/* CONTEXTIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
+	/* TPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+	  NULL, reset_unknown, TPIDR_EL1 },
+
+	/* CNTKCTL_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+	  NULL, reset_val, CNTKCTL_EL1, 0},
+
+	/* CSSELR_EL1 */
+	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, CSSELR_EL1 },
+
+	/* PMCR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+	  trap_raz_wi },
+	/* PMCNTENSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+	  trap_raz_wi },
+	/* PMCNTENCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+	  trap_raz_wi },
+	/* PMOVSCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+	  trap_raz_wi },
+	/* PMSWINC_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+	  trap_raz_wi },
+	/* PMSELR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+	  trap_raz_wi },
+	/* PMCEID0_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+	  trap_raz_wi },
+	/* PMCEID1_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+	  trap_raz_wi },
+	/* PMCCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+	  trap_raz_wi },
+	/* PMXEVTYPER_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+	  trap_raz_wi },
+	/* PMXEVCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+	  trap_raz_wi },
+	/* PMUSERENR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+	  trap_raz_wi },
+	/* PMOVSSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+	  trap_raz_wi },
+
+	/* TPIDR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+	  NULL, reset_unknown, TPIDR_EL0 },
+	/* TPIDRRO_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+	  NULL, reset_unknown, TPIDRRO_EL0 },
+
+	/* DACR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, DACR32_EL2 },
+	/* IFSR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, IFSR32_EL2 },
+	/* FPEXC32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+		u32 el3 = !!((pfr >> 12) & 0xf);
+
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+					  (((dfr >> 12) & 0xf) << 24) |
+					  (((dfr >> 28) & 0xf) << 20) |
+					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		return true;
+	}
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n)					\
+	/* DBGBVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
+	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
+	/* DBGBCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
+	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
+	/* DBGWVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
+	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
+	/* DBGWCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
+	  NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n)						\
+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
+	  NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+	/* DBGIDR */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+	/* DBGDTRRXext */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+	DBG_BCR_BVR_WCR_WVR(0),
+	/* DBGDSCRint */
+	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(1),
+	/* DBGDCCINT */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+	/* DBGDSCRext */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(2),
+	/* DBGDTR[RT]Xint */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+	/* DBGDTR[RT]Xext */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(3),
+	DBG_BCR_BVR_WCR_WVR(4),
+	DBG_BCR_BVR_WCR_WVR(5),
+	/* DBGWFAR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+	/* DBGOSECCR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(6),
+	/* DBGVCR */
+	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(7),
+	DBG_BCR_BVR_WCR_WVR(8),
+	DBG_BCR_BVR_WCR_WVR(9),
+	DBG_BCR_BVR_WCR_WVR(10),
+	DBG_BCR_BVR_WCR_WVR(11),
+	DBG_BCR_BVR_WCR_WVR(12),
+	DBG_BCR_BVR_WCR_WVR(13),
+	DBG_BCR_BVR_WCR_WVR(14),
+	DBG_BCR_BVR_WCR_WVR(15),
+
+	/* DBGDRAR (32bit) */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+	DBGBXVR(0),
+	/* DBGOSLAR */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+	DBGBXVR(1),
+	/* DBGOSLSR */
+	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+	DBGBXVR(2),
+	DBGBXVR(3),
+	/* DBGOSDLR */
+	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+	DBGBXVR(4),
+	/* DBGPRCR */
+	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+	DBGBXVR(5),
+	DBGBXVR(6),
+	DBGBXVR(7),
+	DBGBXVR(8),
+	DBGBXVR(9),
+	DBGBXVR(10),
+	DBGBXVR(11),
+	DBGBXVR(12),
+	DBGBXVR(13),
+	DBGBXVR(14),
+	DBGBXVR(15),
+
+	/* DBGDSAR (32bit) */
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+	/* DBGDEVID2 */
+	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+	/* DBGDEVID1 */
+	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+	/* DBGDEVID */
+	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+	/* DBGCLAIMSET */
+	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+	/* DBGCLAIMCLR */
+	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+	/* DBGAUTHSTATUS */
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+	/* DBGDRAR (64bit) */
+	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+	/* DBGDSAR (64bit) */
+	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
+static const struct sys_reg_desc cp15_regs[] = {
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
+	/* PMU */
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table)
+{
+	target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target,
+						   bool mode_is_64,
+						   size_t *num)
+{
+	struct kvm_sys_reg_target_table *table;
+
+	table = target_tables[target];
+	if (mode_is_64) {
+		*num = table->table64.num;
+		return table->table64.table;
+	} else {
+		*num = table->table32.num;
+		return table->table32.table;
+	}
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct sys_reg_desc *r = &table[i];
+
+		if (params->Op0 != r->Op0)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/*
+ * emulate_cp --  tries to match a sys_reg access in a handling table, and
+ *                call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_params *params,
+		      const struct sys_reg_desc *table,
+		      size_t num)
+{
+	const struct sys_reg_desc *r;
+
+	if (!table)
+		return -1;	/* Not handled */
+
+	r = find_reg(params, table, num);
+
+	if (r) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+		}
+
+		/* Handled */
+		return 0;
+	}
+
+	/* Not handled */
+	return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+				struct sys_reg_params *params)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	int cp;
+
+	switch(hsr_ec) {
+	case ESR_EL2_EC_CP15_32:
+	case ESR_EL2_EC_CP15_64:
+		cp = 15;
+		break;
+	case ESR_EL2_EC_CP14_MR:
+	case ESR_EL2_EC_CP14_64:
+		cp = 14;
+		break;
+	default:
+		WARN_ON((cp = -1));
+	}
+
+	kvm_err("Unsupported guest CP%d access at: %08lx\n",
+		cp, *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt2 = (hsr >> 10) & 0xf;
+
+	params.is_aarch32 = true;
+	params.is_32bit = false;
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.CRn = 0;
+
+	/*
+	 * Massive hack here. Store Rt2 in the top 32bits so we only
+	 * have one register to deal with. As we use the same trap
+	 * backends between AArch32 and AArch64, we get away with it.
+	 */
+	if (params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val &= 0xffffffff;
+		val |= *vcpu_reg(vcpu, Rt2) << 32;
+		*vcpu_reg(vcpu, params.Rt) = val;
+	}
+
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		goto out;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		goto out;
+
+	unhandled_cp_access(vcpu, &params);
+
+out:
+	/* Do the opposite hack for the read side */
+	if (!params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val >>= 32;
+		*vcpu_reg(vcpu, Rt2) = val;
+	}
+
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	params.is_aarch32 = true;
+	params.is_32bit = true;
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt  = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+	params.CRn = (hsr >> 10) & 0xf;
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 14) & 0x7;
+	params.Op2 = (hsr >> 17) & 0x7;
+
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		return 1;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		return 1;
+
+	unhandled_cp_access(vcpu, &params);
+	return 1;
+}
+
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_64(vcpu,
+				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_32(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_64(vcpu,
+				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+				NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_32(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest sys_reg access at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+			      const struct sys_reg_desc *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+	params.is_aarch32 = false;
+	params.is_32bit = false;
+	params.Op0 = (esr >> 20) & 3;
+	params.Op1 = (esr >> 14) & 0x7;
+	params.CRn = (esr >> 10) & 0xf;
+	params.CRm = (esr >> 1) & 0xf;
+	params.Op2 = (esr >> 17) & 0x7;
+	params.Rt = (esr >> 5) & 0x1f;
+	params.is_write = !(esr & 1);
+
+	return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U64:
+		/* Any unused index bits means it's not valid. */
+		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+			      | KVM_REG_ARM_COPROC_MASK
+			      | KVM_REG_ARM64_SYSREG_OP0_MASK
+			      | KVM_REG_ARM64_SYSREG_OP1_MASK
+			      | KVM_REG_ARM64_SYSREG_CRN_MASK
+			      | KVM_REG_ARM64_SYSREG_CRM_MASK
+			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
+			return false;
+		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+						    u64 id)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+	struct sys_reg_params params;
+
+	/* We only do sys_reg for now. */
+	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+		return NULL;
+
+	if (!index_to_params(id, &params))
+		return NULL;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	r = find_reg(&params, table, num);
+	if (!r)
+		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	/* Not saved in the sys_reg array? */
+	if (r && !r->reg)
+		r = NULL;
+
+	return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg)						\
+	static void get_##reg(struct kvm_vcpu *v,			\
+			      const struct sys_reg_desc *r)		\
+	{								\
+		u64 val;						\
+									\
+		asm volatile("mrs %0, " __stringify(reg) "\n"		\
+			     : "=r" (val));				\
+		((struct sys_reg_desc *)r)->val = val;			\
+	}
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, get_midr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+	  NULL, get_revidr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  NULL, get_id_pfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+	  NULL, get_id_pfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+	  NULL, get_id_dfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+	  NULL, get_id_afr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+	  NULL, get_id_mmfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+	  NULL, get_id_mmfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+	  NULL, get_id_mmfr2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+	  NULL, get_id_mmfr3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  NULL, get_id_isar0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+	  NULL, get_id_isar1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  NULL, get_id_isar2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+	  NULL, get_id_isar3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+	  NULL, get_id_isar4_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+	  NULL, get_id_isar5_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_clidr_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+	  NULL, get_aidr_el1 },
+	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
+{
+	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
+{
+	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+	int err;
+	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (r->val != val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+	u32 level, ctype;
+
+	if (val >= CSSELR_MAX)
+		return false;
+
+	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+	level = (val >> 1);
+	ctype = (cache_levels >> (level * 3)) & 7;
+
+	switch (ctype) {
+	case 0: /* No cache */
+		return false;
+	case 1: /* Instruction cache only */
+		return (val & 1);
+	case 2: /* Data cache only */
+	case 4: /* Unified cache */
+		return !(val & 1);
+	case 3: /* Separate instruction and data caches */
+		return true;
+	default: /* Reserved: we can't know instruction or data. */
+		return false;
+	}
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+	u32 val;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		return put_user(get_ccsidr(val), uval);
+	default:
+		return -ENOENT;
+	}
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+	u32 val, newval;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		if (get_user(newval, uval))
+			return -EFAULT;
+
+		/* This is also invariant: you can't change it. */
+		if (newval != get_ccsidr(val))
+			return -EINVAL;
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_get(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return get_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_set(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return set_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < CSSELR_MAX; i++)
+		if (is_valid_cache(i))
+			count++;
+
+	return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	unsigned int i;
+
+	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if (!is_valid_cache(i))
+			continue;
+		if (put_user(val | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+	return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+		KVM_REG_ARM64_SYSREG |
+		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+	if (!*uind)
+		return true;
+
+	if (put_user(sys_reg_to_index(reg), *uind))
+		return false;
+
+	(*uind)++;
+	return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+	const struct sys_reg_desc *i1, *i2, *end1, *end2;
+	unsigned int total = 0;
+	size_t num;
+
+	/* We check for duplicates here, to allow arch-specific overrides. */
+	i1 = get_target_table(vcpu->arch.target, true, &num);
+	end1 = i1 + num;
+	i2 = sys_reg_descs;
+	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+	BUG_ON(i1 == end1 || i2 == end2);
+
+	/* Walk carefully, as both tables may refer to the same register. */
+	while (i1 || i2) {
+		int cmp = cmp_sys_reg(i1, i2);
+		/* target-specific overrides generic entry. */
+		if (cmp <= 0) {
+			/* Ignore registers we trap but don't save. */
+			if (i1->reg) {
+				if (!copy_reg_to_user(i1, &uind))
+					return -EFAULT;
+				total++;
+			}
+		} else {
+			/* Ignore registers we trap but don't save. */
+			if (i2->reg) {
+				if (!copy_reg_to_user(i2, &uind))
+					return -EFAULT;
+				total++;
+			}
+		}
+
+		if (cmp <= 0 && ++i1 == end1)
+			i1 = NULL;
+		if (cmp >= 0 && ++i2 == end2)
+			i2 = NULL;
+	}
+	return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+	return ARRAY_SIZE(invariant_sys_regs)
+		+ num_demux_regs()
+		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	int err;
+
+	/* Then give them all the invariant registers' indices. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	err = walk_sys_regs(vcpu, uindices);
+	if (err < 0)
+		return err;
+	uindices += err;
+
+	return write_demux_regids(uindices);
+}
+
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 1; i < n; i++) {
+		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+void kvm_sys_reg_table_init(void)
+{
+	unsigned int i;
+	struct sys_reg_desc clidr;
+
+	/* Make sure tables are unique and in order. */
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+
+	/* We abuse the reset function to overwrite the table itself. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+	/*
+	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
+	 *
+	 *   If software reads the Cache Type fields from Ctype1
+	 *   upwards, once it has seen a value of 0b000, no caches
+	 *   exist at further-out levels of the hierarchy. So, for
+	 *   example, if Ctype3 is the first Cache Type field with a
+	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
+	 *   ignored.
+	 */
+	get_clidr_el1(NULL, &clidr); /* Ugly... */
+	cache_levels = clidr.val;
+	for (i = 0; i < 7; i++)
+		if (((cache_levels >> (i*3)) & 7) == 0)
+			break;
+	/* Clear all higher bits. */
+	cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+	size_t num;
+	const struct sys_reg_desc *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+	/* Generic chip reset first (so target could override). */
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	reset_sys_reg_descs(vcpu, table, num);
+
+	for (num = 1; num < NR_SYS_REGS; num++)
+		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000000..d411e251412c
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+	u8	Rt;
+	bool	is_write;
+	bool	is_aarch32;
+	bool	is_32bit;	/* Only valid if is_aarch32 is true */
+};
+
+struct sys_reg_desc {
+	/* MRS/MSR instruction which accesses it. */
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct sys_reg_params *,
+		       const struct sys_reg_desc *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+	/* Index into sys_reg[], or 0 if we don't need to save it. */
+	int reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct sys_reg_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg write to read-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg read to write-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+			      const struct sys_reg_desc *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->Op0 != i2->Op0)
+		return i1->Op0 - i2->Op0;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) 	.Op0 = _x
+#define Op1(_x) 	.Op1 = _x
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op2(_x) 	.Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000000..475fd2929310
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 actlr;
+
+	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+	/* ACTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+	/* ACTLR */
+	{ Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+	.table64 = {
+		.table = genericv8_sys_regs,
+		.num = ARRAY_SIZE(genericv8_sys_regs),
+	},
+	.table32 = {
+		.table = genericv8_cp15_regs,
+		.num = ARRAY_SIZE(genericv8_cp15_regs),
+	},
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+		BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+			       &genericv8_sys_regs[i]) >= 0);
+
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+					  &genericv8_target_table);
+
+	return 0;
+}
+late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000000..ae211772f991
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
+
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__restore_vgic_v2_state)
+
+	.popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000000..d16046999e06
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro	save_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Make sure stores to the GIC via the memory mapped interface
+	// are now visible to the system register interface
+	dsb	st
+
+	// Save all interesting registers
+	mrs_s	x4, ICH_HCR_EL2
+	mrs_s	x5, ICH_VMCR_EL2
+	mrs_s	x6, ICH_MISR_EL2
+	mrs_s	x7, ICH_EISR_EL2
+	mrs_s	x8, ICH_ELSR_EL2
+
+	str	w4, [x3, #VGIC_V3_CPU_HCR]
+	str	w5, [x3, #VGIC_V3_CPU_VMCR]
+	str	w6, [x3, #VGIC_V3_CPU_MISR]
+	str	w7, [x3, #VGIC_V3_CPU_EISR]
+	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+	msr_s	ICH_HCR_EL2, xzr
+
+	mrs_s	x21, ICH_VTR_EL2
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	mrs_s	x20, ICH_LR15_EL2
+	mrs_s	x19, ICH_LR14_EL2
+	mrs_s	x18, ICH_LR13_EL2
+	mrs_s	x17, ICH_LR12_EL2
+	mrs_s	x16, ICH_LR11_EL2
+	mrs_s	x15, ICH_LR10_EL2
+	mrs_s	x14, ICH_LR9_EL2
+	mrs_s	x13, ICH_LR8_EL2
+	mrs_s	x12, ICH_LR7_EL2
+	mrs_s	x11, ICH_LR6_EL2
+	mrs_s	x10, ICH_LR5_EL2
+	mrs_s	x9, ICH_LR4_EL2
+	mrs_s	x8, ICH_LR3_EL2
+	mrs_s	x7, ICH_LR2_EL2
+	mrs_s	x6, ICH_LR1_EL2
+	mrs_s	x5, ICH_LR0_EL2
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	str	x20, [x3, #LR_OFFSET(15)]
+	str	x19, [x3, #LR_OFFSET(14)]
+	str	x18, [x3, #LR_OFFSET(13)]
+	str	x17, [x3, #LR_OFFSET(12)]
+	str	x16, [x3, #LR_OFFSET(11)]
+	str	x15, [x3, #LR_OFFSET(10)]
+	str	x14, [x3, #LR_OFFSET(9)]
+	str	x13, [x3, #LR_OFFSET(8)]
+	str	x12, [x3, #LR_OFFSET(7)]
+	str	x11, [x3, #LR_OFFSET(6)]
+	str	x10, [x3, #LR_OFFSET(5)]
+	str	x9, [x3, #LR_OFFSET(4)]
+	str	x8, [x3, #LR_OFFSET(3)]
+	str	x7, [x3, #LR_OFFSET(2)]
+	str	x6, [x3, #LR_OFFSET(1)]
+	str	x5, [x3, #LR_OFFSET(0)]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP0R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	mrs_s	x19, ICH_AP0R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6:	mrs_s	x18, ICH_AP0R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5:	mrs_s	x17, ICH_AP0R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP0R]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP1R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	mrs_s	x19, ICH_AP1R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6:	mrs_s	x18, ICH_AP1R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5:	mrs_s	x17, ICH_AP1R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP1R]
+
+	// Restore SRE_EL1 access and re-enable SRE at EL1.
+	mrs_s	x5, ICC_SRE_EL2
+	orr	x5, x5, #ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+	isb
+	mov	x5, #1
+	msr_s	ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro	restore_vgic_v3_state
+	// Disable SRE_EL1 access. Necessary, otherwise
+	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+	msr_s	ICC_SRE_EL1, xzr
+	isb
+
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Restore all interesting registers
+	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
+
+	msr_s	ICH_HCR_EL2, x4
+	msr_s	ICH_VMCR_EL2, x5
+
+	mrs_s	x21, ICH_VTR_EL2
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	msr_s	ICH_AP1R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+	msr_s	ICH_AP1R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+	msr_s	ICH_AP1R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
+	msr_s	ICH_AP1R0_EL2, x17
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	msr_s	ICH_AP0R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+	msr_s	ICH_AP0R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+	msr_s	ICH_AP0R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
+	msr_s	ICH_AP0R0_EL2, x17
+
+	and	w22, w21, #0xf
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	ldr	x20, [x3, #LR_OFFSET(15)]
+	ldr	x19, [x3, #LR_OFFSET(14)]
+	ldr	x18, [x3, #LR_OFFSET(13)]
+	ldr	x17, [x3, #LR_OFFSET(12)]
+	ldr	x16, [x3, #LR_OFFSET(11)]
+	ldr	x15, [x3, #LR_OFFSET(10)]
+	ldr	x14, [x3, #LR_OFFSET(9)]
+	ldr	x13, [x3, #LR_OFFSET(8)]
+	ldr	x12, [x3, #LR_OFFSET(7)]
+	ldr	x11, [x3, #LR_OFFSET(6)]
+	ldr	x10, [x3, #LR_OFFSET(5)]
+	ldr	x9, [x3, #LR_OFFSET(4)]
+	ldr	x8, [x3, #LR_OFFSET(3)]
+	ldr	x7, [x3, #LR_OFFSET(2)]
+	ldr	x6, [x3, #LR_OFFSET(1)]
+	ldr	x5, [x3, #LR_OFFSET(0)]
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	msr_s	ICH_LR15_EL2, x20
+	msr_s	ICH_LR14_EL2, x19
+	msr_s	ICH_LR13_EL2, x18
+	msr_s	ICH_LR12_EL2, x17
+	msr_s	ICH_LR11_EL2, x16
+	msr_s	ICH_LR10_EL2, x15
+	msr_s	ICH_LR9_EL2,  x14
+	msr_s	ICH_LR8_EL2,  x13
+	msr_s	ICH_LR7_EL2,  x12
+	msr_s	ICH_LR6_EL2,  x11
+	msr_s	ICH_LR5_EL2,  x10
+	msr_s	ICH_LR4_EL2,   x9
+	msr_s	ICH_LR3_EL2,   x8
+	msr_s	ICH_LR2_EL2,   x7
+	msr_s	ICH_LR1_EL2,   x6
+	msr_s	ICH_LR0_EL2,   x5
+
+	// Ensure that the above will have reached the
+	// (re)distributors. This ensure the guest will read
+	// the correct values from the memory-mapped interface.
+	isb
+	dsb	sy
+
+	// Prevent the guest from touching the GIC system registers
+	mrs_s	x5, ICC_SRE_EL2
+	and	x5, x5, #~ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+	save_vgic_v3_state
+	ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+	restore_vgic_v3_state
+	ret
+ENDPROC(__restore_vgic_v3_state)
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+	mrs_s	x0, ICH_VTR_EL2
+	ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+	.popsection
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 59acc0ef0462..328ce1a99daa 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,6 +1,4 @@
-lib-y		:= bitops.o delay.o					\
-		   strncpy_from_user.o strnlen_user.o clear_user.o	\
-		   copy_from_user.o copy_to_user.o copy_in_user.o	\
-		   copy_page.o clear_page.o				\
-		   memchr.o memcpy.o memmove.o memset.o			\
+lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
+		   copy_to_user.o copy_in_user.o copy_page.o		\
+		   clear_page.o memchr.o memcpy.o memmove.o memset.o	\
 		   strchr.o strrchr.o
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
index e5db797790d3..7dac371cc9a2 100644
--- a/arch/arm64/lib/bitops.S
+++ b/arch/arm64/lib/bitops.S
@@ -46,11 +46,12 @@ ENTRY(	\name	)
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	lsl	x4, x2, x3		// Create mask
-1:	ldaxr	x2, [x1]
+1:	ldxr	x2, [x1]
 	lsr	x0, x2, x3		// Save old value of bit
 	\instr	x2, x2, x4		// toggle bit
 	stlxr	w5, x2, [x1]
 	cbnz	w5, 1b
+	dmb	ish
 	and	x0, x0, #1
 3:	ret
 ENDPROC(\name	)
diff --git a/arch/arm64/lib/strncpy_from_user.S b/arch/arm64/lib/strncpy_from_user.S
deleted file mode 100644
index 56e448a831a0..000000000000
--- a/arch/arm64/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Based on arch/arm/lib/strncpy_from_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/*
- * Copy a string from user space to kernel space.
- *  x0 = dst, x1 = src, x2 = byte length
- * returns the number of characters copied (strlen of copied string),
- *  -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__strncpy_from_user)
-	mov	x4, x1
-1:	subs	x2, x2, #1
-	bmi	2f
-USER(9f, ldrb	w3, [x1], #1	)
-	strb	w3, [x0], #1
-	cbnz	w3, 1b
-	sub	x1, x1, #1	// take NUL character out of count
-2:	sub	x0, x1, x4
-	ret
-ENDPROC(__strncpy_from_user)
-
-	.section .fixup,"ax"
-	.align	0
-9:	strb	wzr, [x0]	// null terminate
-	mov	x0, #-EFAULT
-	ret
-	.previous
diff --git a/arch/arm64/lib/strnlen_user.S b/arch/arm64/lib/strnlen_user.S
deleted file mode 100644
index 7f7b176a5646..000000000000
--- a/arch/arm64/lib/strnlen_user.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Based on arch/arm/lib/strnlen_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/* Prototype: unsigned long __strnlen_user(const char *str, long n)
- * Purpose  : get length of a string in user memory
- * Params   : str - address of string in user memory
- * Returns  : length of string *including terminator*
- *	      or zero on exception, or n if too long
- */
-ENTRY(__strnlen_user)
-	mov	x2, x0
-1:	subs	x1, x1, #1
-	b.mi	2f
-USER(9f, ldrb	w3, [x0], #1	)
-	cbnz	w3, 1b
-2:	sub	x0, x0, x2
-	ret
-ENDPROC(__strnlen_user)
-
-	.section .fixup,"ax"
-	.align	0
-9:	mov	x0, #0
-	ret
-	.previous
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 3140a2abcdc2..3ecb56c624d3 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,4 +1,5 @@
 obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   cache.o copypage.o flush.o \
 				   ioremap.o mmap.o pgd.o mmu.o \
-				   context.o tlb.o proc.o
+				   context.o proc.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 48a386094fa3..fda756875fa6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -30,7 +30,7 @@
  *
  *	Corrupted registers: x0-x7, x9-x11
  */
-ENTRY(__flush_dcache_all)
+__flush_dcache_all:
 	dsb	sy				// ensure ordering with previous memory accesses
 	mrs	x0, clidr_el1			// read clidr
 	and	x3, x0, #0x7000000		// extract loc from clidr
@@ -146,7 +146,7 @@ ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
 /*
- *	__flush_kern_dcache_page(kaddr)
+ *	__flush_dcache_area(kaddr, size)
  *
  *	Ensure that the data held in the page kaddr is written back to the
  *	page in question.
@@ -166,3 +166,97 @@ ENTRY(__flush_dcache_area)
 	dsb	sy
 	ret
 ENDPROC(__flush_dcache_area)
+
+/*
+ *	__inval_cache_range(start, end)
+ *	- start   - start address of region
+ *	- end     - end address of region
+ */
+ENTRY(__inval_cache_range)
+	/* FALLTHROUGH */
+
+/*
+ *	__dma_inv_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_inv_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	tst	x1, x3				// end cache line aligned?
+	bic	x1, x1, x3
+	b.eq	1f
+	dc	civac, x1			// clean & invalidate D / U line
+1:	tst	x0, x3				// start cache line aligned?
+	bic	x0, x0, x3
+	b.eq	2f
+	dc	civac, x0			// clean & invalidate D / U line
+	b	3f
+2:	dc	ivac, x0			// invalidate D / U line
+3:	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	2b
+	dsb	sy
+	ret
+ENDPROC(__inval_cache_range)
+ENDPROC(__dma_inv_range)
+
+/*
+ *	__dma_clean_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_clean_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	cvac, x0			// clean D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_clean_range)
+
+/*
+ *	__dma_flush_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_flush_range)
+
+/*
+ *	__dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_map_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_FROM_DEVICE
+	b.eq	__dma_inv_range
+	b	__dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ *	__dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_unmap_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_TO_DEVICE
+	b.ne	__dma_inv_range
+	ret
+ENDPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 9aecbace4128..13bbc3be6f5a 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -27,8 +27,10 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
 	copy_page(kto, kfrom);
 	__flush_dcache_area(kto, PAGE_SIZE);
 }
+EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
 
 void __cpu_clear_user_page(void *kaddr, unsigned long vaddr)
 {
 	clear_page(kaddr);
 }
+EXPORT_SYMBOL_GPL(__cpu_clear_user_page);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4bd7579ec9e6..f39a55d58918 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -21,34 +21,277 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
+#include <linux/amba/bus.h>
 
 #include <asm/cacheflush.h>
 
 struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
-static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
-					  dma_addr_t *dma_handle, gfp_t flags,
-					  struct dma_attrs *attrs)
+static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
+				 bool coherent)
 {
-	if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+		return pgprot_writecombine(prot);
+	else if (!coherent)
+		return pgprot_dmacoherent(prot);
+	return prot;
+}
+
+static void *__dma_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *dma_handle, gfp_t flags,
+				  struct dma_attrs *attrs)
+{
+	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
-		flags |= GFP_DMA32;
-	return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+		flags |= GFP_DMA;
+	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+		struct page *page;
+
+		size = PAGE_ALIGN(size);
+		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
+							get_order(size));
+		if (!page)
+			return NULL;
+
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+		return page_address(page);
+	} else {
+		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+	}
+}
+
+static void __dma_free_coherent(struct device *dev, size_t size,
+				void *vaddr, dma_addr_t dma_handle,
+				struct dma_attrs *attrs)
+{
+	if (dev == NULL) {
+		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
+		return;
+	}
+
+	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+		phys_addr_t paddr = dma_to_phys(dev, dma_handle);
+
+		dma_release_from_contiguous(dev,
+					phys_to_page(paddr),
+					size >> PAGE_SHIFT);
+	} else {
+		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+	}
+}
+
+static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
+				     dma_addr_t *dma_handle, gfp_t flags,
+				     struct dma_attrs *attrs)
+{
+	struct page *page, **map;
+	void *ptr, *coherent_ptr;
+	int order, i;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+	if (!ptr)
+		goto no_mem;
+	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
+	if (!map)
+		goto no_map;
+
+	/* remove any dirty cache lines on the kernel alias */
+	__dma_flush_range(ptr, ptr + size);
+
+	/* create a coherent mapping */
+	page = virt_to_page(ptr);
+	for (i = 0; i < (size >> PAGE_SHIFT); i++)
+		map[i] = page + i;
+	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
+			    __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false));
+	kfree(map);
+	if (!coherent_ptr)
+		goto no_map;
+
+	return coherent_ptr;
+
+no_map:
+	__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+	*dma_handle = ~0;
+	return NULL;
+}
+
+static void __dma_free_noncoherent(struct device *dev, size_t size,
+				   void *vaddr, dma_addr_t dma_handle,
+				   struct dma_attrs *attrs)
+{
+	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+	vunmap(vaddr);
+	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	dma_addr_t dev_addr;
+
+	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+	return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+				 size_t size, enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
+{
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+				  int nelems, enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i, ret;
+
+	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+	for_each_sg(sgl, sg, ret, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+
+	return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+				     struct scatterlist *sgl, int nelems,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
 }
 
-static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_handle,
-					struct dma_attrs *attrs)
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+					  dma_addr_t dev_addr, size_t size,
+					  enum dma_data_direction dir)
 {
-	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
 }
 
-static struct dma_map_ops arm64_swiotlb_dma_ops = {
-	.alloc = arm64_swiotlb_alloc_coherent,
-	.free = arm64_swiotlb_free_coherent,
+static void __swiotlb_sync_single_for_device(struct device *dev,
+					     dma_addr_t dev_addr, size_t size,
+					     enum dma_data_direction dir)
+{
+	swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+				      struct scatterlist *sgl, int nelems,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+					 struct scatterlist *sgl, int nelems,
+					 enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+}
+
+/* vma->vm_page_prot must be set appropriately before calling this function */
+static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
+					PAGE_SHIFT;
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      vma->vm_end - vma->vm_start,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
+static int __swiotlb_mmap_noncoherent(struct device *dev,
+		struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs)
+{
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
+	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+static int __swiotlb_mmap_coherent(struct device *dev,
+		struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs)
+{
+	/* Just use whatever page_prot attributes were specified */
+	return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+struct dma_map_ops noncoherent_swiotlb_dma_ops = {
+	.alloc = __dma_alloc_noncoherent,
+	.free = __dma_free_noncoherent,
+	.mmap = __swiotlb_mmap_noncoherent,
+	.map_page = __swiotlb_map_page,
+	.unmap_page = __swiotlb_unmap_page,
+	.map_sg = __swiotlb_map_sg_attrs,
+	.unmap_sg = __swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = __swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
+
+struct dma_map_ops coherent_swiotlb_dma_ops = {
+	.alloc = __dma_alloc_coherent,
+	.free = __dma_free_coherent,
+	.mmap = __swiotlb_mmap_coherent,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
 	.map_sg = swiotlb_map_sg_attrs,
@@ -60,12 +303,47 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = {
 	.dma_supported = swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
 };
+EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
+
+static int dma_bus_notifier(struct notifier_block *nb,
+			    unsigned long event, void *_dev)
+{
+	struct device *dev = _dev;
+
+	if (event != BUS_NOTIFY_ADD_DEVICE)
+		return NOTIFY_DONE;
+
+	if (of_property_read_bool(dev->of_node, "dma-coherent"))
+		set_dma_ops(dev, &coherent_swiotlb_dma_ops);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block platform_bus_nb = {
+	.notifier_call = dma_bus_notifier,
+};
+
+static struct notifier_block amba_bus_nb = {
+	.notifier_call = dma_bus_notifier,
+};
+
+extern int swiotlb_late_init_with_default_size(size_t default_size);
 
-void __init arm64_swiotlb_init(void)
+static int __init swiotlb_late_init(void)
 {
-	dma_ops = &arm64_swiotlb_dma_ops;
-	swiotlb_init(1);
+	size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
+
+	/*
+	 * These must be registered before of_platform_populate().
+	 */
+	bus_register_notifier(&platform_bus_type, &platform_bus_nb);
+	bus_register_notifier(&amba_bustype, &amba_bus_nb);
+
+	dma_ops = &noncoherent_swiotlb_dma_ops;
+
+	return swiotlb_late_init_with_default_size(swiotlb_size);
 }
+arch_initcall(swiotlb_late_init);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES	4096
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f51d669c8ebd..df4f2fd187c3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 	force_sig_info(sig, &si, tsk);
 }
 
-void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->active_mm;
@@ -359,17 +359,6 @@ static int __kprobes do_translation_fault(unsigned long addr,
 }
 
 /*
- * Some section permission faults need to be handled gracefully.  They can
- * happen due to a __{get,put}_user during an oops.
- */
-static int do_sect_fault(unsigned long addr, unsigned int esr,
-			 struct pt_regs *regs)
-{
-	do_bad_area(addr, esr, regs);
-	return 0;
-}
-
-/*
  * This abort handler always returns "fault".
  */
 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
@@ -392,12 +381,12 @@ static struct fault_info {
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 7c716634a671..0d64089d28b5 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -70,11 +70,6 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 #endif
 }
 
-void __flush_dcache_page(struct page *page)
-{
-	__flush_dcache_area(page_address(page), PAGE_SIZE);
-}
-
 void __sync_icache_dcache(pte_t pte, unsigned long addr)
 {
 	struct page *page = pte_page(pte);
@@ -84,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 		return;
 
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		__flush_dcache_page(page);
+		__flush_dcache_area(page_address(page),
+				PAGE_SIZE << compound_order(page));
 		__flush_icache_all();
 	} else if (icache_is_aivivt()) {
 		__flush_icache_all();
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..2fc8258bab2d
--- /dev/null
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -0,0 +1,70 @@
+/*
+ * arch/arm64/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/mm/hugetlbpage.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+#endif
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
+
+int pud_huge(pud_t pud)
+{
+	return !(pud_val(pud) & PUD_TABLE_BIT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f497ca77925a..81bdd29df3a4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,8 @@
 #include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/of_fdt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
 
 #include <asm/prom.h>
 #include <asm/sections.h>
@@ -44,8 +46,7 @@ static unsigned long phys_initrd_size __initdata = 0;
 
 phys_addr_t memstart_addr __read_mostly = 0;
 
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	phys_initrd_start = start;
 	phys_initrd_size = end - start;
@@ -67,22 +68,22 @@ static int __init early_initrd(char *p)
 }
 early_param("initrd", early_initrd);
 
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
-
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-	unsigned long max_dma32 = min;
+	unsigned long max_dma = min;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
-#ifdef CONFIG_ZONE_DMA32
 	/* 4GB maximum for 32-bit only capable devices */
-	max_dma32 = max(min, min(max, MAX_DMA32_PFN));
-	zone_size[ZONE_DMA32] = max_dma32 - min;
-#endif
-	zone_size[ZONE_NORMAL] = max - max_dma32;
+	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+		unsigned long max_dma_phys =
+			(unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1);
+		max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT));
+		zone_size[ZONE_DMA] = max_dma - min;
+	}
+	zone_size[ZONE_NORMAL] = max - max_dma;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
 
@@ -92,15 +93,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 
 		if (start >= max)
 			continue;
-#ifdef CONFIG_ZONE_DMA32
-		if (start < max_dma32) {
-			unsigned long dma_end = min(end, max_dma32);
-			zhole_size[ZONE_DMA32] -= dma_end - start;
+
+		if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+			unsigned long dma_end = min(end, max_dma);
+			zhole_size[ZONE_DMA] -= dma_end - start;
 		}
-#endif
-		if (end > max_dma32) {
+
+		if (end > max_dma) {
 			unsigned long normal_end = min(end, max);
-			unsigned long normal_start = max(start, max_dma32);
+			unsigned long normal_start = max(start, max_dma);
 			zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
 		}
 	}
@@ -135,7 +136,10 @@ void __init arm64_memblock_init(void)
 {
 	u64 *reserve_map, base, size;
 
-	/* Register the kernel text, kernel data and initrd with memblock */
+	/*
+	 * Register the kernel text, kernel data, initrd, and initial
+	 * pagetables with memblock.
+	 */
 	memblock_reserve(__pa(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (phys_initrd_size) {
@@ -147,13 +151,6 @@ void __init arm64_memblock_init(void)
 	}
 #endif
 
-	/*
-	 * Reserve the page tables.  These are already in use,
-	 * and can only be in node 0.
-	 */
-	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE);
-	memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE);
-
 	/* Reserve the dtb region */
 	memblock_reserve(virt_to_phys(initial_boot_params),
 			 be32_to_cpu(initial_boot_params->totalsize));
@@ -173,6 +170,9 @@ void __init arm64_memblock_init(void)
 		memblock_reserve(base, size);
 	}
 
+	early_init_fdt_scan_reserved_mem();
+	dma_contiguous_reserve(0);
+
 	memblock_allow_resize();
 	memblock_dump_all();
 }
@@ -283,8 +283,6 @@ void __init mem_init(void)
 	unsigned long reserved_pages, free_pages;
 	struct memblock_region *reg;
 
-	arm64_swiotlb_init();
-
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 1725cd6db37a..00d315ae1de9 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -25,6 +25,10 @@
 #include <linux/vmalloc.h>
 #include <linux/io.h>
 
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
 static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
 				      pgprot_t prot, void *caller)
 {
@@ -82,3 +86,95 @@ void __iounmap(volatile void __iomem *io_addr)
 	vunmap(addr);
 }
 EXPORT_SYMBOL(__iounmap);
+
+void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
+{
+	/* For normal memory we already have a cacheable mapping. */
+	if (pfn_valid(__phys_to_pfn(phys_addr)))
+		return (void __iomem *)__phys_to_virt(phys_addr);
+
+	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+#ifndef CONFIG_ARM64_64K_PAGES
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#endif
+
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+
+	pgd = pgd_offset_k(addr);
+	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+	pud = pud_offset(pgd, addr);
+	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+	return pmd_offset(pud, addr);
+}
+
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
+{
+	pmd_t *pmd = early_ioremap_pmd(addr);
+
+	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_ioremap_init(void)
+{
+	pmd_t *pmd;
+
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+#ifndef CONFIG_ARM64_64K_PAGES
+	/* need to populate pmd for 4k pagesize only */
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+#endif
+	/*
+	 * The boot-ioremap range spans multiple pmds, for which
+	 * we are not prepared:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		pr_warn("pmd %p != %p\n",
+			pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		pr_warn("FIX_BTMAP_BEGIN:     %d\n",
+			FIX_BTMAP_BEGIN);
+	}
+
+	early_ioremap_setup();
+}
+
+void __init __early_set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	pte = early_ioremap_pte(addr);
+
+	if (pgprot_val(flags))
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+	else {
+		pte_clear(&init_mm, addr, pte);
+		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+	}
+}
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index 916701e6d040..d519f4f50c8c 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,3 +1,2 @@
-extern void __flush_dcache_page(struct page *page);
 extern void __init bootmem_init(void);
 extern void __init arm64_swiotlb_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ba7477efad5c..a2155ca6921c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,11 +43,6 @@
 struct page *empty_zero_page;
 EXPORT_SYMBOL(empty_zero_page);
 
-pgprot_t pgprot_default;
-EXPORT_SYMBOL(pgprot_default);
-
-static pmdval_t prot_sect_kernel;
-
 struct cachepolicy {
 	const char	policy[16];
 	u64		mair;
@@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p)
 }
 early_param("cachepolicy", early_cachepolicy);
 
-/*
- * Adjust the PMD section entries according to the CPU in use.
- */
-static void __init init_mem_pgprot(void)
-{
-	pteval_t default_pgprot;
-	int i;
-
-	default_pgprot = PTE_ATTRINDX(MT_NORMAL);
-	prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL);
-
-#ifdef CONFIG_SMP
-	/*
-	 * Mark memory with the "shared" attribute for SMP systems
-	 */
-	default_pgprot |= PTE_SHARED;
-	prot_sect_kernel |= PMD_SECT_S;
-#endif
-
-	for (i = 0; i < 16; i++) {
-		unsigned long v = pgprot_val(protection_map[i]);
-		protection_map[i] = __pgprot(v | default_pgprot);
-	}
-
-	pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot);
-}
-
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
@@ -168,7 +136,8 @@ static void __init *early_alloc(unsigned long sz)
 }
 
 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, unsigned long pfn)
+				  unsigned long end, unsigned long pfn,
+				  pgprot_t prot)
 {
 	pte_t *pte;
 
@@ -180,16 +149,28 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
 static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
-				  unsigned long end, phys_addr_t phys)
+				  unsigned long end, phys_addr_t phys,
+				  int map_io)
 {
 	pmd_t *pmd;
 	unsigned long next;
+	pmdval_t prot_sect;
+	pgprot_t prot_pte;
+
+	if (map_io) {
+		prot_sect = PMD_TYPE_SECT | PMD_SECT_AF |
+			    PMD_ATTRINDX(MT_DEVICE_nGnRE);
+		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
+	} else {
+		prot_sect = PROT_SECT_NORMAL_EXEC;
+		prot_pte = PAGE_KERNEL_EXEC;
+	}
 
 	/*
 	 * Check for initial section mappings in the pgd/pud and remove them.
@@ -205,7 +186,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
 			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys | prot_sect_kernel));
+			set_pmd(pmd, __pmd(phys | prot_sect));
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -213,21 +194,23 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 			if (!pmd_none(old_pmd))
 				flush_tlb_all();
 		} else {
-			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys));
+			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+				       prot_pte);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
 }
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
-				  unsigned long end, unsigned long phys)
+				  unsigned long end, unsigned long phys,
+				  int map_io)
 {
 	pud_t *pud = pud_offset(pgd, addr);
 	unsigned long next;
 
 	do {
 		next = pud_addr_end(addr, end);
-		alloc_init_pmd(pud, addr, next, phys);
+		alloc_init_pmd(pud, addr, next, phys, map_io);
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
 }
@@ -236,74 +219,60 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
-				  phys_addr_t size)
+static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
+				    unsigned long virt, phys_addr_t size,
+				    int map_io)
 {
 	unsigned long addr, length, end, next;
-	pgd_t *pgd;
-
-	if (virt < VMALLOC_START) {
-		pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\n",
-			   phys, virt);
-		return;
-	}
 
 	addr = virt & PAGE_MASK;
 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 
-	pgd = pgd_offset_k(addr);
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys);
+		alloc_init_pud(pgd, addr, next, phys, map_io);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-#ifdef CONFIG_EARLY_PRINTK
-/*
- * Create an early I/O mapping using the pgd/pmd entries already populated
- * in head.S as this function is called too early to allocated any memory. The
- * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
- */
-void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
+static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size)
 {
-	unsigned long size, mask;
-	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	/*
-	 * No early pte entries with !ARM64_64K_PAGES configuration, so using
-	 * sections (pmd).
-	 */
-	size = page64k ? PAGE_SIZE : SECTION_SIZE;
-	mask = ~(size - 1);
-
-	pgd = pgd_offset_k(virt);
-	pud = pud_offset(pgd, virt);
-	if (pud_none(*pud))
-		return NULL;
-	pmd = pmd_offset(pud, virt);
-
-	if (page64k) {
-		if (pmd_none(*pmd))
-			return NULL;
-		pte = pte_offset_kernel(pmd, virt);
-		set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
-	} else {
-		set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
+	if (virt < VMALLOC_START) {
+		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+			&phys, virt);
+		return;
 	}
+	__create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+}
 
-	return (void __iomem *)((virt & mask) + (phys & ~mask));
+void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+{
+	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
+		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+		return;
+	}
+	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
+			 addr, addr, size, map_io);
 }
-#endif
 
 static void __init map_mem(void)
 {
 	struct memblock_region *reg;
+	phys_addr_t limit;
+
+	/*
+	 * Temporarily limit the memblock range. We need to do this as
+	 * create_mapping requires puds, pmds and ptes to be allocated from
+	 * memory addressable from the initial direct kernel mapping.
+	 *
+	 * The initial direct kernel mapping, located at swapper_pg_dir,
+	 * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be
+	 * aligned to 2MB as per Documentation/arm64/booting.txt).
+	 */
+	limit = PHYS_OFFSET + PGDIR_SIZE;
+	memblock_set_current_limit(limit);
 
 	/* map all the memory banks */
 	for_each_memblock(memory, reg) {
@@ -313,8 +282,27 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
+#ifndef CONFIG_ARM64_64K_PAGES
+		/*
+		 * For the first memory bank align the start address and
+		 * current memblock limit to prevent create_mapping() from
+		 * allocating pte page tables from unmapped memory.
+		 * When 64K pages are enabled, the pte page table for the
+		 * first PGDIR_SIZE is already present in swapper_pg_dir.
+		 */
+		if (start < limit)
+			start = ALIGN(start, PMD_SIZE);
+		if (end < limit) {
+			limit = end & PMD_MASK;
+			memblock_set_current_limit(limit);
+		}
+#endif
+
 		create_mapping(start, __phys_to_virt(start), end - start);
 	}
+
+	/* Limit no longer required. */
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
 /*
@@ -325,13 +313,6 @@ void __init paging_init(void)
 {
 	void *zero_page;
 
-	/*
-	 * Maximum PGDIR_SIZE addressable via the initial direct kernel
-	 * mapping in swapper_pg_dir.
-	 */
-	memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
-
-	init_mem_pgprot();
 	map_mem();
 
 	/*
@@ -390,6 +371,9 @@ int kern_addr_valid(unsigned long addr)
 	if (pmd_none(*pmd))
 		return 0;
 
+	if (pmd_sect(*pmd))
+		return pfn_valid(pmd_pfn(*pmd));
+
 	pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte))
 		return 0;
@@ -430,7 +414,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 			if (!p)
 				return -ENOMEM;
 
-			set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel));
+			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
 		} else
 			vmemmap_verify((pte_t *)pmd, node, addr, next);
 	} while (addr = next, addr != end);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 7083cdada657..62c6101df260 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -32,17 +32,10 @@
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *new_pgd;
-
 	if (PGD_SIZE == PAGE_SIZE)
-		new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+		return (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	else
-		new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL);
-
-	if (!new_pgd)
-		return NULL;
-
-	return new_pgd;
+		return kzalloc(PGD_SIZE, GFP_KERNEL);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 8957b822010b..005d29e2977d 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -38,8 +38,7 @@
  */
 	.macro	dcache_line_size, reg, tmp
 	mrs	\tmp, ctr_el0			// read CTR
-	lsr	\tmp, \tmp, #16
-	and	\tmp, \tmp, #0xf		// cache line size encoding
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index f84fcf71f129..e085ee6ef4e2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -80,8 +80,77 @@ ENTRY(cpu_do_idle)
 	ret
 ENDPROC(cpu_do_idle)
 
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+/**
+ * cpu_do_suspend - save CPU registers context
+ *
+ * x0: virtual address of context pointer
+ */
+ENTRY(cpu_do_suspend)
+	mrs	x2, tpidr_el0
+	mrs	x3, tpidrro_el0
+	mrs	x4, contextidr_el1
+	mrs	x5, mair_el1
+	mrs	x6, cpacr_el1
+	mrs	x7, ttbr1_el1
+	mrs	x8, tcr_el1
+	mrs	x9, vbar_el1
+	mrs	x10, mdscr_el1
+	mrs	x11, oslsr_el1
+	mrs	x12, sctlr_el1
+	stp	x2, x3, [x0]
+	stp	x4, x5, [x0, #16]
+	stp	x6, x7, [x0, #32]
+	stp	x8, x9, [x0, #48]
+	stp	x10, x11, [x0, #64]
+	str	x12, [x0, #80]
+	ret
+ENDPROC(cpu_do_suspend)
+
+/**
+ * cpu_do_resume - restore CPU register context
+ *
+ * x0: Physical address of context pointer
+ * x1: ttbr0_el1 to be restored
+ *
+ * Returns:
+ *	sctlr_el1 value in x0
+ */
+ENTRY(cpu_do_resume)
+	/*
+	 * Invalidate local tlb entries before turning on MMU
+	 */
+	tlbi	vmalle1
+	ldp	x2, x3, [x0]
+	ldp	x4, x5, [x0, #16]
+	ldp	x6, x7, [x0, #32]
+	ldp	x8, x9, [x0, #48]
+	ldp	x10, x11, [x0, #64]
+	ldr	x12, [x0, #80]
+	msr	tpidr_el0, x2
+	msr	tpidrro_el0, x3
+	msr	contextidr_el1, x4
+	msr	mair_el1, x5
+	msr	cpacr_el1, x6
+	msr	ttbr0_el1, x1
+	msr	ttbr1_el1, x7
+	msr	tcr_el1, x8
+	msr	vbar_el1, x9
+	msr	mdscr_el1, x10
+	/*
+	 * Restore oslsr_el1 by writing oslar_el1
+	 */
+	ubfx	x11, x11, #1, #1
+	msr	oslar_el1, x11
+	mov	x0, x12
+	dsb	nsh		// Make sure local tlb invalidation completed
+	isb
+	ret
+ENDPROC(cpu_do_resume)
+#endif
+
 /*
- *	cpu_switch_mm(pgd_phys, tsk)
+ *	cpu_do_switch_mm(pgd_phys, tsk)
  *
  *	Set the translation table base pointer to be pgd_phys.
  *
@@ -104,19 +173,13 @@ ENDPROC(cpu_do_switch_mm)
  *	value of the SCTLR_EL1 register.
  */
 ENTRY(__cpu_setup)
-	/*
-	 * Preserve the link register across the function call.
-	 */
-	mov	x28, lr
-	bl	__flush_dcache_all
-	mov	lr, x28
 	ic	iallu				// I+BTB cache invalidate
+	tlbi	vmalle1is			// invalidate I + D TLBs
 	dsb	sy
 
 	mov	x0, #3 << 20
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
 	msr	mdscr_el1, xzr			// Reset mdscr_el1
-	tlbi	vmalle1is			// invalidate I + D TLBs
 	/*
 	 * Memory region attributes for LPAE:
 	 *
@@ -146,8 +209,14 @@ ENTRY(__cpu_setup)
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
 	 * both user and kernel.
 	 */
-	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
-		      TCR_ASID16 | (1 << 31)
+	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
+		      TCR_ASID16 | TCR_TBI0 | (1 << 31)
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+	 * TCR_EL1.
+	 */
+	mrs	x9, ID_AA64MMFR0_EL1
+	bfi	x10, x9, #32, #3
 #ifdef CONFIG_ARM64_64K_PAGES
 	orr	x10, x10, TCR_TG0_64K
 	orr	x10, x10, TCR_TG1_64K
@@ -162,9 +231,9 @@ ENDPROC(__cpu_setup)
 	 *       CE0      XWHW CZ     ME TEEA S
 	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
 	 * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved
-	 * .... .100 .... 01.1 11.1 ..01 0001 1101 < software settings
+	 * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings
 	 */
 	.type	crval, #object
 crval:
-	.word	0x030802e2			// clear
+	.word	0x000802e2			// clear
 	.word	0x0405d11d			// set
diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S
deleted file mode 100644
index 8ae80a18e8ec..000000000000
--- a/arch/arm64/mm/tlb.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Based on arch/arm/mm/tlb.S
- *
- * Copyright (C) 1997-2002 Russell King
- * Copyright (C) 2012 ARM Ltd.
- * Written by Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- *	__cpu_flush_user_tlb_range(start, end, vma)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
- */
-ENTRY(__cpu_flush_user_tlb_range)
-	vma_vm_mm x3, x2			// get vma->vm_mm
-	mmid	x3, x3				// get vm_mm->context.id
-	dsb	sy
-	lsr	x0, x0, #12			// align address
-	lsr	x1, x1, #12
-	bfi	x0, x3, #48, #16		// start VA and ASID
-	bfi	x1, x3, #48, #16		// end VA and ASID
-1:	tlbi	vae1is, x0			// TLB invalidate by address and ASID
-	add	x0, x0, #1
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
-	ret
-ENDPROC(__cpu_flush_user_tlb_range)
-
-/*
- *	__cpu_flush_kern_tlb_range(start,end)
- *
- *	Invalidate a range of kernel TLB entries.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- */
-ENTRY(__cpu_flush_kern_tlb_range)
-	dsb	sy
-	lsr	x0, x0, #12			// align address
-	lsr	x1, x1, #12
-1:	tlbi	vaae1is, x0			// TLB invalidate by address
-	add	x0, x0, #1
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
-	isb
-	ret
-ENDPROC(__cpu_flush_kern_tlb_range)
diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h
index 8a029505d7b7..2f1c3c2657ad 100644
--- a/arch/blackfin/include/asm/ftrace.h
+++ b/arch/blackfin/include/asm/ftrace.h
@@ -66,16 +66,7 @@ extern inline void *return_address(unsigned int level)
 
 #endif /* CONFIG_FRAME_POINTER */
 
-#define HAVE_ARCH_CALLER_ADDR
-
-/* inline function or macro may lead to unexpected result */
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c
index bdb56f09d0ac..287d0e64dfba 100644
--- a/arch/c6x/kernel/devicetree.c
+++ b/arch/c6x/kernel/devicetree.c
@@ -33,8 +33,7 @@ void __init early_init_devtree(void *params)
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..cf03097176b1 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -238,9 +238,6 @@ struct kvm_vm_data {
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	1
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
@@ -599,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f034563aeae5..ff0968042829 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -44,10 +44,15 @@
 
 #define EFI_DEBUG	0
 
+static __initdata unsigned long palo_phys;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
+	{NULL_GUID, NULL, 0},
+};
+
 extern efi_status_t efi_call_phys (void *, ...);
 
-struct efi efi;
-EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
 static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
@@ -423,9 +428,9 @@ static u8 __init palo_checksum(u8 *buffer, u32 length)
  * Parse and handle PALO table which is published at:
  * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
  */
-static void __init handle_palo(unsigned long palo_phys)
+static void __init handle_palo(unsigned long phys_addr)
 {
-	struct palo_table *palo = __va(palo_phys);
+	struct palo_table *palo = __va(phys_addr);
 	u8  checksum;
 
 	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
@@ -467,12 +472,13 @@ void __init
 efi_init (void)
 {
 	void *efi_map_start, *efi_map_end;
-	efi_config_table_t *config_tables;
 	efi_char16_t *c16;
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
-	unsigned long palo_phys;
+
+	set_bit(EFI_BOOT, &efi.flags);
+	set_bit(EFI_64BIT, &efi.flags);
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -514,8 +520,6 @@ efi_init (void)
 		       efi.systab->hdr.revision >> 16,
 		       efi.systab->hdr.revision & 0xffff);
 
-	config_tables = __va(efi.systab->tables);
-
 	/* Show what we know for posterity */
 	c16 = __va(efi.systab->fw_vendor);
 	if (c16) {
@@ -528,43 +532,12 @@ efi_init (void)
 	       efi.systab->hdr.revision >> 16,
 	       efi.systab->hdr.revision & 0xffff, vendor);
 
-	efi.mps        = EFI_INVALID_TABLE_ADDR;
-	efi.acpi       = EFI_INVALID_TABLE_ADDR;
-	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
-	efi.smbios     = EFI_INVALID_TABLE_ADDR;
-	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
-	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
-	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
-	efi.uga        = EFI_INVALID_TABLE_ADDR;
+	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
 
 	palo_phys      = EFI_INVALID_TABLE_ADDR;
 
-	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
-		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
-			efi.mps = config_tables[i].table;
-			printk(" MPS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
-			efi.acpi20 = config_tables[i].table;
-			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
-			efi.acpi = config_tables[i].table;
-			printk(" ACPI=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
-			efi.smbios = config_tables[i].table;
-			printk(" SMBIOS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
-			efi.sal_systab = config_tables[i].table;
-			printk(" SALsystab=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
-			efi.hcdp = config_tables[i].table;
-			printk(" HCDP=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid,
-			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
-			palo_phys = config_tables[i].table;
-			printk(" PALO=0x%lx", config_tables[i].table);
-		}
-	}
-	printk("\n");
+	if (efi_config_init(arch_tables) != 0)
+		return;
 
 	if (palo_phys != EFI_INVALID_TABLE_ADDR)
 		handle_palo(palo_phys);
@@ -689,6 +662,8 @@ efi_enter_virtual_mode (void)
 		return;
 	}
 
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
 	/*
 	 * Now that EFI is in virtual mode, we call the EFI functions more
 	 * efficiently:
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 1a4053789d01..18e45ec49bbf 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
 
 ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o)
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
 
 ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b2dc0d10c8f..c9aa236dc29b 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
@@ -702,7 +702,7 @@ again:
 out:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
-		kvm_resched(vcpu);
+		cond_resched();
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
@@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	}
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
@@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_release_vm_pages(kvm);
 }
 
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (cpu != vcpu->cpu) {
@@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kfree(vcpu->arch.apic);
 }
 
-
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1550,12 +1541,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
@@ -1591,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change)
-{
-	return;
-}
-
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
@@ -1847,10 +1826,6 @@ int kvm_arch_hardware_setup(void)
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index d05b8455c44c..bdc48111f0df 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -419,10 +419,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
-	pr_err("%s(%lx, %lx)\n",
+	pr_err("%s(%llx, %llx)\n",
 	       __func__, start, end);
 }
 #endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 0a2c68f9f9b0..c9c766fe6321 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -52,13 +52,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 
 #ifdef CONFIG_EARLY_PRINTK
-static char *stdout;
+static const char *stdout;
 
 static int __init early_init_dt_scan_chosen_serial(unsigned long node,
 				const char *uname, int depth, void *data)
 {
-	unsigned long l;
-	char *p;
+	int l;
+	const char *p;
 
 	pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname);
 
@@ -89,7 +89,7 @@ static int __init early_init_dt_scan_chosen_serial(unsigned long node,
 				(strncmp(p, "xlnx,opb-uartlite", 17) == 0) ||
 				(strncmp(p, "xlnx,axi-uartlite", 17) == 0) ||
 				(strncmp(p, "xlnx,mdm", 8) == 0)) {
-			unsigned int *addrp;
+			const unsigned int *addrp;
 
 			*(u32 *)data = UARTLITE;
 
@@ -136,8 +136,7 @@ void __init early_init_devtree(void *params)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..5e3f4b0f18c8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -71,11 +71,6 @@
 #define CAUSEB_DC       27
 #define CAUSEF_DC       (_ULCAST_(1)   << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
@@ -659,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 5712bb532245..32b87882ac87 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -58,8 +58,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index 2c7b3ade8ec0..5cd48572450e 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -196,16 +196,21 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                 struct kvm_memory_slot *memslot,
                                 struct kvm_userspace_memory_region *mem,
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b973e0af9cb..d340d53c345b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -74,6 +74,7 @@
  */
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 /*
  * Not static inline because used by IP27 special magic initialization code
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 5869e3fa5dd3..150215a91711 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -96,8 +96,7 @@ void __init early_init_devtree(void *params)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
index 72c0fafaa039..544ed8ef87eb 100644
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -24,15 +24,7 @@ extern void return_to_handler(void);
 
 extern unsigned long return_address(unsigned int);
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#define CALLER_ADDR4 return_address(4)
-#define CALLER_ADDR5 return_address(5)
-#define CALLER_ADDR6 return_address(6)
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..f391f3fbde8b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
@@ -628,4 +623,12 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..e2dd05c81bc6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
 extern void kvm_release_hpt(struct kvmppc_linear_info *li);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bc2da154f68b..ac204e022922 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 
 extern void kdump_move_device_tree(void);
 
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
 /* cache lookup */
 struct device_node *of_find_next_cache_node(struct device_node *np);
 
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2230fd0ca3e4..7213d930918d 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -55,9 +55,9 @@ int crash_mem_ranges;
 int __init early_init_dt_scan_fw_dump(unsigned long node,
 			const char *uname, int depth, void *data)
 {
-	__be32 *sections;
+	const __be32 *sections;
 	int i, num_sections;
-	unsigned long size;
+	int size;
 	const int *token;
 
 	if (depth != 1 || strcmp(uname, "rtas") != 0)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a99cce2..fe0c17dcfbd7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -162,7 +162,7 @@ static struct ibm_pa_feature {
 	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 };
 
-static void __init scan_features(unsigned long node, unsigned char *ftrs,
+static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 				 unsigned long tablelen,
 				 struct ibm_pa_feature *fp,
 				 unsigned long ft_size)
@@ -201,8 +201,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
 
 static void __init check_cpu_pa_features(unsigned long node)
 {
-	unsigned char *pa_ftrs;
-	unsigned long tablelen;
+	const unsigned char *pa_ftrs;
+	int tablelen;
 
 	pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
 	if (pa_ftrs == NULL)
@@ -215,7 +215,7 @@ static void __init check_cpu_pa_features(unsigned long node)
 #ifdef CONFIG_PPC_STD_MMU_64
 static void __init check_cpu_slb_size(unsigned long node)
 {
-	u32 *slb_size_ptr;
+	const __be32 *slb_size_ptr;
 
 	slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
 	if (slb_size_ptr != NULL) {
@@ -256,7 +256,7 @@ static struct feature_property {
 static inline void identical_pvr_fixup(unsigned long node)
 {
 	unsigned int pvr;
-	char *model = of_get_flat_dt_prop(node, "model", NULL);
+	const char *model = of_get_flat_dt_prop(node, "model", NULL);
 
 	/*
 	 * Since 440GR(x)/440EP(x) processors have the same pvr,
@@ -294,11 +294,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	const u32 *prop;
-	const u32 *intserv;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	const __be32 *intserv;
 	int i, nthreads;
-	unsigned long len;
+	int len;
 	int found = -1;
 	int found_thread = 0;
 
@@ -389,7 +389,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
 					 int depth, void *data)
 {
-	unsigned long *lprop;
+	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
 
 	/* Use common scan routine to determine if this is the chosen node */
 	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
@@ -440,8 +440,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
  */
 static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 {
-	__be32 *dm, *ls, *usm;
-	unsigned long l, n, flags;
+	const __be32 *dm, *ls, *usm;
+	int l;
+	unsigned long n, flags;
 	u64 base, size, memblock_size;
 	unsigned int is_kexec_kdump = 0, rngs;
 
@@ -550,8 +551,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
@@ -827,49 +827,10 @@ static int __init prom_reconfig_setup(void)
 __initcall(prom_reconfig_setup);
 #endif
 
-/* Find the device node for a given logical cpu number, also returns the cpu
- * local thread number (index in ibm,interrupt-server#s) if relevant and
- * asked for (non NULL)
- */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
-	int hardid;
-	struct device_node *np;
-
-	hardid = get_hard_smp_processor_id(cpu);
-
-	for_each_node_by_type(np, "cpu") {
-		const u32 *intserv;
-		unsigned int plen, t;
-
-		/* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
-		 * fallback to "reg" property and assume no threads
-		 */
-		intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
-				&plen);
-		if (intserv == NULL) {
-			const u32 *reg = of_get_property(np, "reg", NULL);
-			if (reg == NULL)
-				continue;
-			if (*reg == hardid) {
-				if (thread)
-					*thread = 0;
-				return np;
-			}
-		} else {
-			plen /= sizeof(u32);
-			for (t = 0; t < plen; t++) {
-				if (hardid == intserv[t]) {
-					if (thread)
-						*thread = t;
-					return np;
-				}
-			}
-		}
-	}
-	return NULL;
+	return (int)phys_id == get_hard_smp_processor_id(cpu);
 }
-EXPORT_SYMBOL(of_get_cpu_node);
 
 #if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
 static struct debugfs_blob_wrapper flat_dt_blob;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52add6f3e201..2d6f5a8e19e2 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1135,7 +1135,7 @@ void __init rtas_initialize(void)
 int __init early_init_dt_scan_rtas(unsigned long node,
 		const char *uname, int depth, void *data)
 {
-	u32 *basep, *entryp, *sizep;
+	const u32 *basep, *entryp, *sizep;
 
 	if (depth != 1 || strcmp(uname, "rtas") != 0)
 		return 0;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..60019a6fd6bb 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -155,6 +155,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..008cd856c5b5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
-						eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+		$(KVM)/eventfd.o
 
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-	../../../virt/kvm/coalesced_mmio.o \
+	$(KVM)/coalesced_mmio.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
 kvm-book3s_64-module-objs := \
-	../../../virt/kvm/kvm_main.o \
-	../../../virt/kvm/eventfd.o \
+	$(KVM)/kvm_main.o \
+	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
 	book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
 kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 102ad8a255f3..a54c59db7bd8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1258,7 +1258,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvm_guest_exit();
 
 	preempt_enable();
-	kvm_resched(vcpu);
+	cond_resched();
 
 	spin_lock(&vc->lock);
 	now = get_tb();
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b51189773..0a91f47e264b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 2861ae9eaae6..b58d61039015 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..ea4cfdc991da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,24 +246,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -296,11 +288,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
@@ -409,15 +397,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -436,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
@@ -1125,7 +1110,3 @@ int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
-
-void kvm_arch_exit(void)
-{
-}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index e303a6d74e3a..929e3e675868 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -250,9 +250,9 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 const char *uname, int depth,
 					 void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
-	unsigned long size = 0;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -306,9 +306,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
-	unsigned long size = 0;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -389,9 +389,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 					const char *uname, int depth,
 					void *data) {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	unsigned long *addr_prop;
-	u32 *page_count_prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be64 *addr_prop;
+	const __be32 *page_count_prop;
 	unsigned int expected_pages;
 	long unsigned int phys_addr;
 	long unsigned int block_size;
@@ -533,8 +533,8 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
 				       const char *uname, int depth,
 				       void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 18c104820198..6e19b0ad5d26 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -199,8 +199,8 @@ static void __init efika_setup_arch(void)
 
 static int __init efika_probe(void)
 {
-	char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
-					  "model", NULL);
+	const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"model", NULL);
 
 	if (model == NULL)
 		return 0;
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index c665d7de6c99..7044fd36197b 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -574,8 +574,8 @@ chrp_init2(void)
 
 static int __init chrp_probe(void)
 {
- 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
- 					  "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"device_type", NULL);
  	if (dtype == NULL)
  		return 0;
  	if (strcmp(dtype, "chrp"))
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564ceadb..06620ea33a8e 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -35,8 +35,8 @@ static unsigned int opal_irq_count;
 int __init early_init_dt_scan_opal(unsigned long node,
 				   const char *uname, int depth, void *data)
 {
-	const void *basep, *entryp;
-	unsigned long basesz, entrysz;
+	const void *basep, *entryp, *sizep;
+	int basesz, entrysz, runtimesz;
 
 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 		return 0;
@@ -50,10 +50,10 @@ int __init early_init_dt_scan_opal(unsigned long node,
 	opal.base = of_read_number(basep, basesz/4);
 	opal.entry = of_read_number(entryp, entrysz/4);
 
-	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%ld)\n",
+	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 		 opal.base, basep, basesz);
-	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
-		 opal.entry, entryp, entrysz);
+	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
+		 opal.size, sizep, runtimesz);
 
 	powerpc_firmware_features |= FW_FEATURE_OPAL;
 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
@@ -105,7 +105,7 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count)
 	opal_poll_events(&evt);
 	if ((evt & OPAL_EVENT_CONSOLE_INPUT) == 0)
 		return 0;
-	len = count;
+	len = cpu_to_be64(count);
 	rc = opal_console_read(vtermno, &len, buf);
 	if (rc == OPAL_SUCCESS)
 		return len;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 54b998f2750d..995cc0457c76 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -646,7 +646,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 					    void *data)
 {
 	const char *prop;
-	unsigned long len;
+	int len;
 	static int hypertas_found;
 	static int vec5_found;
 
@@ -679,7 +679,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 static int __init pSeries_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
- 	char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
 
  	if (dtype == NULL)
  		return 0;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..99971dfc6b9a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
@@ -266,4 +269,18 @@ struct kvm_arch{
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+
 #endif
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 698fb826e149..412fbc5dc688 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -86,16 +86,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
@@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
 int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
 
-void kvm_arch_exit(void)
-{
-}
-
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -127,7 +115,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
@@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
@@ -320,11 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-	/* Nothing todo */
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_regs(&vcpu->arch.host_fpregs);
@@ -971,12 +950,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
@@ -1026,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
-
 static int __init kvm_s390_init(void)
 {
 	int ret;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index eba15f18fd38..a4dfc0bd05db 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 static void __init setup_zero_pages(void)
 {
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 13e9966464c2..e79fb6ebaa42 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -40,15 +40,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 /* arch/sh/kernel/return_address.c */
 extern void *return_address(unsigned int);
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4e5b80d883c8..3115eae96ad8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -208,6 +208,12 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
 config ZONE_DMA32
 	bool
 	default X86_64
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 1308beed7abe..17177e80d466 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -878,14 +878,15 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	struct efi_info *efi;
 	efi_loaded_image_t *image;
 	void *options;
-	u32 load_options_size;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
 	int options_size = 0;
 	efi_status_t status;
-	unsigned long cmdline;
+	char *cmdline_ptr;
 	u16 *s2;
 	u8 *s1;
 	int i;
+	unsigned long ramdisk_addr;
+	unsigned long ramdisk_size;
 
 	sys_table = _table;
 
@@ -896,13 +897,14 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	status = efi_call_phys3(sys_table->boottime->handle_protocol,
 				handle, &proto, (void *)&image);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+		efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
 		return NULL;
 	}
 
-	status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
+	status = efi_low_alloc(sys_table, 0x4000, 1,
+			       (unsigned long *)&boot_params);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to alloc lowmem for boot params\n");
+		efi_printk(sys_table, "Failed to alloc lowmem for boot params\n");
 		return NULL;
 	}
 
@@ -927,40 +929,10 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	options = image->load_options;
-	load_options_size = image->load_options_size / 2; /* ASCII */
-	cmdline = 0;
-	s2 = (u16 *)options;
-
-	if (s2) {
-		while (*s2 && *s2 != '\n' && options_size < load_options_size) {
-			s2++;
-			options_size++;
-		}
-
-		if (options_size) {
-			if (options_size > hdr->cmdline_size)
-				options_size = hdr->cmdline_size;
-
-			options_size++;	/* NUL termination */
-
-			status = low_alloc(options_size, 1, &cmdline);
-			if (status != EFI_SUCCESS) {
-				efi_printk("Failed to alloc mem for cmdline\n");
-				goto fail;
-			}
-
-			s1 = (u8 *)(unsigned long)cmdline;
-			s2 = (u16 *)options;
-
-			for (i = 0; i < options_size - 1; i++)
-				*s1++ = *s2++;
-
-			*s1 = '\0';
-		}
-	}
-
-	hdr->cmd_line_ptr = cmdline;
+	cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
+	if (!cmdline_ptr)
+		goto fail;
+	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;
@@ -970,16 +942,20 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 
 	memset(sdt, 0, sizeof(*sdt));
 
-	status = handle_ramdisks(image, hdr);
+	status = handle_cmdline_files(sys_table, image,
+				      (char *)(unsigned long)hdr->cmd_line_ptr,
+				      "initrd=", hdr->initrd_addr_max,
+				      &ramdisk_addr, &ramdisk_size);
 	if (status != EFI_SUCCESS)
 		goto fail2;
+	hdr->ramdisk_image = ramdisk_addr;
+	hdr->ramdisk_size = ramdisk_size;
 
 	return boot_params;
 fail2:
-	if (options_size)
-		low_free(options_size, hdr->cmd_line_ptr);
+	efi_free(sys_table, options_size, hdr->cmd_line_ptr);
 fail:
-	low_free(0x4000, (unsigned long)boot_params);
+	efi_free(sys_table, 0x4000, (unsigned long)boot_params);
 	return NULL;
 }
 
@@ -1002,7 +978,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
 again:
 	size += sizeof(*mem_map) * 2;
 	_size = size;
-	status = low_alloc(size, 1, (unsigned long *)&mem_map);
+	status = efi_low_alloc(sys_table, size, 1, (unsigned long *)&mem_map);
 	if (status != EFI_SUCCESS)
 		return status;
 
@@ -1010,7 +986,7 @@ get_map:
 	status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
 				mem_map, &key, &desc_size, &desc_version);
 	if (status == EFI_BUFFER_TOO_SMALL) {
-		low_free(_size, (unsigned long)mem_map);
+		efi_free(sys_table, _size, (unsigned long)mem_map);
 		goto again;
 	}
 
@@ -1112,44 +1088,10 @@ get_map:
 	return EFI_SUCCESS;
 
 free_mem_map:
-	low_free(_size, (unsigned long)mem_map);
+	efi_free(sys_table, _size, (unsigned long)mem_map);
 	return status;
 }
 
-static efi_status_t relocate_kernel(struct setup_header *hdr)
-{
-	unsigned long start, nr_pages;
-	efi_status_t status;
-
-	/*
-	 * The EFI firmware loader could have placed the kernel image
-	 * anywhere in memory, but the kernel has various restrictions
-	 * on the max physical address it can run at. Attempt to move
-	 * the kernel to boot_params.pref_address, or as low as
-	 * possible.
-	 */
-	start = hdr->pref_address;
-	nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
-
-	status = efi_call_phys4(sys_table->boottime->allocate_pages,
-				EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
-				nr_pages, &start);
-	if (status != EFI_SUCCESS) {
-		status = low_alloc(hdr->init_size, hdr->kernel_alignment,
-				   &start);
-		if (status != EFI_SUCCESS)
-			efi_printk("Failed to alloc mem for kernel\n");
-	}
-
-	if (status == EFI_SUCCESS)
-		memcpy((void *)start, (void *)(unsigned long)hdr->code32_start,
-		       hdr->init_size);
-
-	hdr->pref_address = hdr->code32_start;
-	hdr->code32_start = (__u32)start;
-
-	return status;
-}
 
 /*
  * On success we return a pointer to a boot_params structure, and NULL
@@ -1178,14 +1120,15 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
 				EFI_LOADER_DATA, sizeof(*gdt),
 				(void **)&gdt);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to alloc mem for gdt structure\n");
+		efi_printk(sys_table, "Failed to alloc mem for gdt structure\n");
 		goto fail;
 	}
 
 	gdt->size = 0x800;
-	status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
+	status = efi_low_alloc(sys_table, gdt->size, 8,
+			   (unsigned long *)&gdt->address);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to alloc mem for gdt\n");
+		efi_printk(sys_table, "Failed to alloc mem for gdt\n");
 		goto fail;
 	}
 
@@ -1193,7 +1136,7 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
 				EFI_LOADER_DATA, sizeof(*idt),
 				(void **)&idt);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to alloc mem for idt structure\n");
+		efi_printk(sys_table, "Failed to alloc mem for idt structure\n");
 		goto fail;
 	}
 
@@ -1205,10 +1148,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
 	 * address, relocate it.
 	 */
 	if (hdr->pref_address != hdr->code32_start) {
-		status = relocate_kernel(hdr);
-
+		unsigned long bzimage_addr = hdr->code32_start;
+		status = efi_relocate_kernel(sys_table, &bzimage_addr,
+					     hdr->init_size, hdr->init_size,
+					     hdr->pref_address,
+					     hdr->kernel_alignment);
 		if (status != EFI_SUCCESS)
 			goto fail;
+
+		hdr->pref_address = hdr->code32_start;
+		hdr->code32_start = bzimage_addr;
 	}
 
 	status = exit_boot(boot_params, handle);
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e5b0a8f91c5f..cc78e2da7af9 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -10,8 +10,6 @@
 #define SEG_GRANULARITY_4KB	(1 << 0)
 
 #define DESC_TYPE_CODE_DATA	(1 << 0)
-
-#define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
 #define EFI_CONSOLE_OUT_DEVICE_GUID    \
@@ -40,6 +38,24 @@ struct efi_graphics_output_mode_info {
 	u32 pixels_per_scan_line;
 } __packed;
 
+struct efi_graphics_output_protocol_mode_32 {
+	u32 max_mode;
+	u32 mode;
+	u32 info;
+	u32 size_of_info;
+	u64 frame_buffer_base;
+	u32 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_64 {
+	u32 max_mode;
+	u32 mode;
+	u64 info;
+	u64 size_of_info;
+	u64 frame_buffer_base;
+	u64 frame_buffer_size;
+} __packed;
+
 struct efi_graphics_output_protocol_mode {
 	u32 max_mode;
 	u32 mode;
@@ -49,6 +65,20 @@ struct efi_graphics_output_protocol_mode {
 	unsigned long frame_buffer_size;
 } __packed;
 
+struct efi_graphics_output_protocol_32 {
+	u32 query_mode;
+	u32 set_mode;
+	u32 blt;
+	u32 mode;
+};
+
+struct efi_graphics_output_protocol_64 {
+	u64 query_mode;
+	u64 set_mode;
+	u64 blt;
+	u64 mode;
+};
+
 struct efi_graphics_output_protocol {
 	void *query_mode;
 	unsigned long set_mode;
@@ -56,16 +86,22 @@ struct efi_graphics_output_protocol {
 	struct efi_graphics_output_protocol_mode *mode;
 };
 
+struct efi_uga_draw_protocol_32 {
+	u32 get_mode;
+	u32 set_mode;
+	u32 blt;
+};
+
+struct efi_uga_draw_protocol_64 {
+	u64 get_mode;
+	u64 set_mode;
+	u64 blt;
+};
+
 struct efi_uga_draw_protocol {
 	void *get_mode;
 	void *set_mode;
 	void *blt;
 };
 
-struct efi_simple_text_output_protocol {
-	void *reset;
-	void *output_string;
-	void *test_string;
-};
-
 #endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h
index c09241659971..b4b38bacb404 100644
--- a/arch/x86/include/asm/dma-contiguous.h
+++ b/arch/x86/include/asm/dma-contiguous.h
@@ -4,7 +4,6 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <asm-generic/dma-contiguous.h>
 
 static inline void
 dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { }
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f30c727..7a76dc284166 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,7 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
-extern unsigned long x86_efi_facility;
+extern struct efi_scratch efi_scratch;
 extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4c481e751e8e..b15a6b5fa341 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
@@ -96,10 +103,6 @@
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
@@ -632,8 +635,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
 	__u32 padding[3];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 /* for KVM_SET_CPUID2 */
 struct kvm_cpuid2 {
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index b1581527a236..2fbad6b9f23c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 91964c663f0d..a3627ade4b15 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -908,11 +908,11 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL32", 4)) {
-		set_bit(EFI_BOOT, &x86_efi_facility);
+		set_bit(EFI_BOOT, &efi.flags);
 	} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL64", 4)) {
-		set_bit(EFI_BOOT, &x86_efi_facility);
-		set_bit(EFI_64BIT, &x86_efi_facility);
+		set_bit(EFI_BOOT, &efi.flags);
+		set_bit(EFI_64BIT, &efi.flags);
 	}
 
 	if (efi_enabled(EFI_BOOT))
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..bdccfb62aa0d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
@@ -38,6 +39,7 @@ config KVM
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select KVM_VFIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d84048..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
-kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o eventfd.o \
-				irqchip.o)
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
-				assigned-dev.o iommu.o)
-kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
+KVM := ../../../virt/kvm
+
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
+kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o cpuid.o pmu.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..89d288237b9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit)
 
 #define F(x) bit(X86_FEATURE_##x)
 
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			 u32 index, int *nent, int maxnent)
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+				   u32 func, u32 index, int *nent, int maxnent)
+{
+	return 0;
+}
+
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+				 u32 index, int *nent, int maxnent)
 {
 	int r;
 	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -480,6 +486,15 @@ out:
 	return r;
 }
 
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+			u32 idx, int *nent, int maxnent, unsigned int type)
+{
+	if (type == KVM_GET_EMULATED_CPUID)
+		return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+
+	return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+}
+
 #undef F
 
 struct kvm_cpuid_param {
@@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
 
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries)
+static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+				 __u32 num_entries, unsigned int ioctl_type)
+{
+	int i;
+
+	if (ioctl_type != KVM_GET_EMULATED_CPUID)
+		return false;
+
+	/*
+	 * We want to make sure that ->padding is being passed clean from
+	 * userspace in case we want to use it for something in the future.
+	 *
+	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+	 * have to give ourselves satisfied only with the emulated side. /me
+	 * sheds a tear.
+	 */
+	for (i = 0; i < num_entries; i++) {
+		if (entries[i].padding[0] ||
+		    entries[i].padding[1] ||
+		    entries[i].padding[2])
+			return true;
+	}
+	return false;
+}
+
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG, i;
@@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		goto out;
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+
+	if (sanity_check_entries(entries, cpuid->nent, type))
+		return -EINVAL;
+
 	r = -ENOMEM;
 	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
 	if (!cpuid_entries)
@@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 			continue;
 
 		r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
-				&nent, cpuid->nent);
+				&nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
@@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		limit = cpuid_entries[nent - 1].eax;
 		for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
 			r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
-				     &nent, cpuid->nent);
+				     &nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
 void kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 					      u32 function, u32 index);
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries);
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type);
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
 			     struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e14b1f8667bb..0cc34f594c2a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3241,7 +3241,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 	arch.direct_map = vcpu->arch.mmu.direct_map;
 	arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
 
-	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+	return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
 }
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
@@ -4229,7 +4229,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	if (nr_to_scan == 0)
 		goto out;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		int idx;
@@ -4265,7 +4265,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 		break;
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 out:
 	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7e6090e13237..a6a6370c2010 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -69,6 +69,7 @@ struct guest_walker {
 	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
 	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+	bool pte_writable[PT_MAX_FULL_LEVELS];
 	unsigned pt_access;
 	unsigned pte_access;
 	gfn_t gfn;
@@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 		if (pte == orig_pte)
 			continue;
 
+		/*
+		 * If the slot is read-only, simply do not process the accessed
+		 * and dirty bits.  This is the correct thing to do if the slot
+		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+		 * are only supported if the accessed and dirty bits are already
+		 * set in the ROM (so that MMIO writes are never needed).
+		 *
+		 * Note that NPT does not allow this at all and faults, since
+		 * it always wants nested page table entries for the guest
+		 * page tables to be writable.  And EPT works but will simply
+		 * overwrite the read-only memory to set the accessed and dirty
+		 * bits.
+		 */
+		if (unlikely(!walker->pte_writable[level - 1]))
+			continue;
+
 		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
 		if (ret)
 			return ret;
@@ -204,7 +221,8 @@ retry_walk:
 			goto error;
 		real_gfn = gpa_to_gfn(real_gfn);
 
-		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+		host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+					    &walker->pte_writable[walker->level - 1]);
 		if (unlikely(kvm_is_error_hva(host_addr)))
 			goto error;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8bf40a243d75..03f7d03c92a2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -606,7 +606,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51139ff34917..a3476bedd201 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2569,7 +2569,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2633,7 +2633,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 684f46dc87de..dd894d4859a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -247,7 +247,7 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -2550,7 +2550,7 @@ out:
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
@@ -2560,6 +2560,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_EXT_EMUL_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
@@ -2669,15 +2670,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
+	case KVM_GET_SUPPORTED_CPUID:
+	case KVM_GET_EMULATED_CPUID: {
 		struct kvm_cpuid2 __user *cpuid_arg = argp;
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
-		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-						      cpuid_arg->entries);
+
+		r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+					    ioctl);
 		if (r)
 			goto out;
 
@@ -5153,7 +5156,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
@@ -5163,7 +5166,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 				send_ipi = 1;
 		}
 	}
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	if (freq->old < freq->new && send_ipi) {
 		/*
@@ -5310,12 +5313,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
 	atomic_set(&kvm_guest_has_master_clock, 0);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5958,7 +5961,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_resched(vcpu);
+			cond_resched();
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
@@ -6612,7 +6615,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6623,7 +6626,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -6703,10 +6706,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
@@ -6818,6 +6821,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
@@ -6909,7 +6916,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -6930,7 +6937,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
@@ -6988,6 +6996,10 @@ out_free:
 	return -ENOMEM;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
@@ -7126,7 +7138,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	int r;
 
 	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-	      is_error_page(work->page))
+	      work->wakeup_all)
 		return;
 
 	r = kvm_mmu_reload(vcpu);
@@ -7236,7 +7248,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 	struct x86_exception fault;
 
 	trace_kvm_async_pf_ready(work->arch.token, work->gva);
-	if (is_error_page(work->page))
+	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index ae1aa71d0115..7e73e8c69096 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -16,169 +16,6 @@
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 
-static unsigned long page_table_shareable(struct vm_area_struct *svma,
-				struct vm_area_struct *vma,
-				unsigned long addr, pgoff_t idx)
-{
-	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
-				svma->vm_start;
-	unsigned long sbase = saddr & PUD_MASK;
-	unsigned long s_end = sbase + PUD_SIZE;
-
-	/* Allow segments to share if only one is marked locked */
-	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
-	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
-
-	/*
-	 * match the virtual addresses, permission and the alignment of the
-	 * page table page.
-	 */
-	if (pmd_index(addr) != pmd_index(saddr) ||
-	    vm_flags != svm_flags ||
-	    sbase < svma->vm_start || svma->vm_end < s_end)
-		return 0;
-
-	return saddr;
-}
-
-static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
-{
-	unsigned long base = addr & PUD_MASK;
-	unsigned long end = base + PUD_SIZE;
-
-	/*
-	 * check on proper vm_flags and page table alignment
-	 */
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    vma->vm_start <= base && end <= vma->vm_end)
-		return 1;
-	return 0;
-}
-
-/*
- * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
- * and returns the corresponding pte. While this is not necessary for the
- * !shared pmd case because we can allocate the pmd later as well, it makes the
- * code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
- * racing tasks could either miss the sharing (see huge_pte_offset) or select a
- * bad pmd for sharing.
- */
-static pte_t *
-huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
-{
-	struct vm_area_struct *vma = find_vma(mm, addr);
-	struct address_space *mapping = vma->vm_file->f_mapping;
-	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
-			vma->vm_pgoff;
-	struct vm_area_struct *svma;
-	unsigned long saddr;
-	pte_t *spte = NULL;
-	pte_t *pte;
-
-	if (!vma_shareable(vma, addr))
-		return (pte_t *)pmd_alloc(mm, pud, addr);
-
-	mutex_lock(&mapping->i_mmap_mutex);
-	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
-		if (svma == vma)
-			continue;
-
-		saddr = page_table_shareable(svma, vma, addr, idx);
-		if (saddr) {
-			spte = huge_pte_offset(svma->vm_mm, saddr);
-			if (spte) {
-				get_page(virt_to_page(spte));
-				break;
-			}
-		}
-	}
-
-	if (!spte)
-		goto out;
-
-	spin_lock(&mm->page_table_lock);
-	if (pud_none(*pud))
-		pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
-	else
-		put_page(virt_to_page(spte));
-	spin_unlock(&mm->page_table_lock);
-out:
-	pte = (pte_t *)pmd_alloc(mm, pud, addr);
-	mutex_unlock(&mapping->i_mmap_mutex);
-	return pte;
-}
-
-/*
- * unmap huge page backed by shared pte.
- *
- * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
- * indicated by page_count > 1, unmap is achieved by clearing pud and
- * decrementing the ref count. If count == 1, the pte page is not shared.
- *
- * called with vma->vm_mm->page_table_lock held.
- *
- * returns: 1 successfully unmapped a shared pte page
- *	    0 the underlying pte page is not shared, or it is the last user
- */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
-	pgd_t *pgd = pgd_offset(mm, *addr);
-	pud_t *pud = pud_offset(pgd, *addr);
-
-	BUG_ON(page_count(virt_to_page(ptep)) == 0);
-	if (page_count(virt_to_page(ptep)) == 1)
-		return 0;
-
-	pud_clear(pud);
-	put_page(virt_to_page(ptep));
-	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
-	return 1;
-}
-
-pte_t *huge_pte_alloc(struct mm_struct *mm,
-			unsigned long addr, unsigned long sz)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pte_t *pte = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		if (sz == PUD_SIZE) {
-			pte = (pte_t *)pud;
-		} else {
-			BUG_ON(sz != PMD_SIZE);
-			if (pud_none(*pud))
-				pte = huge_pmd_share(mm, addr, pud);
-			else
-				pte = (pte_t *)pmd_alloc(mm, pud, addr);
-		}
-	}
-	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
-
-	return pte;
-}
-
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud)) {
-			if (pud_large(*pud))
-				return (pte_t *)pud;
-			pmd = pmd_offset(pud, addr);
-		}
-	}
-	return (pte_t *) pmd;
-}
-
 #if 0	/* This is just for testing */
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
 	return !!(pud_val(pud) & _PAGE_PSE);
 }
 
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pmd);
-	if (page)
-		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-		pud_t *pud, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pud);
-	if (page)
-		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
 #endif
 
 /* x86_64 also uses this file */
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 6033be9ff81a..816e940b3998 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -60,34 +60,19 @@
 
 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
 
-struct efi __read_mostly efi = {
-	.mps        = EFI_INVALID_TABLE_ADDR,
-	.acpi       = EFI_INVALID_TABLE_ADDR,
-	.acpi20     = EFI_INVALID_TABLE_ADDR,
-	.smbios     = EFI_INVALID_TABLE_ADDR,
-	.sal_systab = EFI_INVALID_TABLE_ADDR,
-	.boot_info  = EFI_INVALID_TABLE_ADDR,
-	.hcdp       = EFI_INVALID_TABLE_ADDR,
-	.uga        = EFI_INVALID_TABLE_ADDR,
-	.uv_systab  = EFI_INVALID_TABLE_ADDR,
-};
-EXPORT_SYMBOL(efi);
-
 struct efi_memory_map memmap;
 
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
-unsigned long x86_efi_facility;
+static __initdata efi_config_table_type_t arch_tables[] = {
+#ifdef CONFIG_X86_UV
+	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+#endif
+	{NULL_GUID, NULL, 0},
+};
 
-/*
- * Returns 1 if 'facility' is enabled, 0 otherwise.
- */
-int efi_enabled(int facility)
-{
-	return test_bit(facility, &x86_efi_facility) != 0;
-}
-EXPORT_SYMBOL(efi_enabled);
+u64 efi_setup;		/* efi setup_data physical address */
 
 static bool __initdata disable_runtime = false;
 static int __init setup_noefi(char *arg)
@@ -397,6 +382,8 @@ int __init efi_memblock_x86_reserve_range(void)
 
 	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
 
+	efi.memmap = &memmap;
+
 	return 0;
 }
 
@@ -454,7 +441,7 @@ void __init efi_reserve_boot_services(void)
 
 void __init efi_unmap_memmap(void)
 {
-	clear_bit(EFI_MEMMAP, &x86_efi_facility);
+	clear_bit(EFI_MEMMAP, &efi.flags);
 	if (memmap.map) {
 		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 		memmap.map = NULL;
@@ -576,80 +563,6 @@ static int __init efi_systab_init(void *phys)
 	return 0;
 }
 
-static int __init efi_config_init(u64 tables, int nr_tables)
-{
-	void *config_tables, *tablep;
-	int i, sz;
-
-	if (efi_enabled(EFI_64BIT))
-		sz = sizeof(efi_config_table_64_t);
-	else
-		sz = sizeof(efi_config_table_32_t);
-
-	/*
-	 * Let's see what config tables the firmware passed to us.
-	 */
-	config_tables = early_ioremap(tables, nr_tables * sz);
-	if (config_tables == NULL) {
-		pr_err("Could not map Configuration table!\n");
-		return -ENOMEM;
-	}
-
-	tablep = config_tables;
-	pr_info("");
-	for (i = 0; i < efi.systab->nr_tables; i++) {
-		efi_guid_t guid;
-		unsigned long table;
-
-		if (efi_enabled(EFI_64BIT)) {
-			u64 table64;
-			guid = ((efi_config_table_64_t *)tablep)->guid;
-			table64 = ((efi_config_table_64_t *)tablep)->table;
-			table = table64;
-#ifdef CONFIG_X86_32
-			if (table64 >> 32) {
-				pr_cont("\n");
-				pr_err("Table located above 4GB, disabling EFI.\n");
-				early_iounmap(config_tables,
-					      efi.systab->nr_tables * sz);
-				return -EINVAL;
-			}
-#endif
-		} else {
-			guid = ((efi_config_table_32_t *)tablep)->guid;
-			table = ((efi_config_table_32_t *)tablep)->table;
-		}
-		if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
-			efi.mps = table;
-			pr_cont(" MPS=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
-			efi.acpi20 = table;
-			pr_cont(" ACPI 2.0=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
-			efi.acpi = table;
-			pr_cont(" ACPI=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
-			efi.smbios = table;
-			pr_cont(" SMBIOS=0x%lx ", table);
-#ifdef CONFIG_X86_UV
-		} else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
-			efi.uv_systab = table;
-			pr_cont(" UVsystab=0x%lx ", table);
-#endif
-		} else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
-			efi.hcdp = table;
-			pr_cont(" HCDP=0x%lx ", table);
-		} else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
-			efi.uga = table;
-			pr_cont(" UGA=0x%lx ", table);
-		}
-		tablep += sz;
-	}
-	pr_cont("\n");
-	early_iounmap(config_tables, efi.systab->nr_tables * sz);
-	return 0;
-}
-
 static int __init efi_runtime_init(void)
 {
 	efi_runtime_services_t *runtime;
@@ -725,7 +638,11 @@ void __init efi_init(void)
 	if (efi_systab_init(efi_phys.systab))
 		return;
 
-	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
+	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+	efi.config_table = (unsigned long)efi.systab->tables;
+	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
+	efi.runtime	 = (unsigned long)efi.systab->runtime;
 
 	/*
 	 * Show what we know for posterity
@@ -743,10 +660,10 @@ void __init efi_init(void)
 		efi.systab->hdr.revision >> 16,
 		efi.systab->hdr.revision & 0xffff, vendor);
 
-	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
+	if (efi_config_init(arch_tables))
 		return;
 
-	set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
+	set_bit(EFI_CONFIG_TABLES, &efi.flags);
 
 	/*
 	 * Note: We currently don't support runtime services on an EFI
@@ -758,13 +675,13 @@ void __init efi_init(void)
 	else {
 		if (disable_runtime || efi_runtime_init())
 			return;
-		set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
+		set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 	}
 
 	if (efi_memmap_init())
 		return;
 
-	set_bit(EFI_MEMMAP, &x86_efi_facility);
+	set_bit(EFI_MEMMAP, &efi.flags);
 
 #if EFI_DEBUG
 	print_efi_memmap();
@@ -807,34 +724,6 @@ static void __init runtime_code_page_mkexec(void)
 	}
 }
 
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
-	void *p;
-	if (WARN_ON(!memmap.map))
-		return NULL;
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
-		u64 size = md->num_pages << EFI_PAGE_SHIFT;
-		u64 end = md->phys_addr + size;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
-		    md->type != EFI_BOOT_SERVICES_CODE &&
-		    md->type != EFI_BOOT_SERVICES_DATA)
-			continue;
-		if (!md->virt_addr)
-			continue;
-		if (phys_addr >= md->phys_addr && phys_addr < end) {
-			phys_addr += md->virt_addr - md->phys_addr;
-			return (__force void __iomem *)(unsigned long)phys_addr;
-		}
-	}
-	return NULL;
-}
-
 void efi_memory_uc(u64 addr, unsigned long size)
 {
 	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
deleted file mode 100644
index 36dc7a684397..000000000000
--- a/arch/xtensa/include/asm/ftrace.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * arch/xtensa/include/asm/ftrace.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Tensilica Inc.
- */
-#ifndef _XTENSA_FTRACE_H
-#define _XTENSA_FTRACE_H
-
-#include <asm/processor.h>
-
-#define HAVE_ARCH_CALLER_ADDR
-#define CALLER_ADDR0 ({ unsigned long a0, a1; \
-		__asm__ __volatile__ ( \
-			"mov %0, a0\n" \
-			"mov %1, a1\n" \
-			: "=r"(a0), "=r"(a1) : : ); \
-		MAKE_PC_FROM_RA(a0, a1); })
-#ifdef CONFIG_FRAME_POINTER
-extern unsigned long return_address(unsigned level);
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#else
-#define CALLER_ADDR1 (0)
-#define CALLER_ADDR2 (0)
-#define CALLER_ADDR3 (0)
-#endif
-
-#endif /* _XTENSA_FTRACE_H */
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 14c6c3a6f04b..ea9afb6904d7 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -170,8 +170,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag)
 
 __tagtable(BP_TAG_FDT, parse_tag_fdt);
 
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (void *)__va(start);
 	initrd_end = (void *)__va(end);
@@ -223,6 +222,43 @@ static int __init parse_bootparam(const bp_tag_t* tag)
 }
 
 #ifdef CONFIG_OF
+bool __initdata dt_memory_scan = false;
+
+#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
+unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR;
+EXPORT_SYMBOL(xtensa_kio_paddr);
+
+static int __init xtensa_dt_io_area(unsigned long node, const char *uname,
+		int depth, void *data)
+{
+	const __be32 *ranges;
+	int len;
+
+	if (depth > 1)
+		return 0;
+
+	if (!of_flat_dt_is_compatible(node, "simple-bus"))
+		return 0;
+
+	ranges = of_get_flat_dt_prop(node, "ranges", &len);
+	if (!ranges)
+		return 1;
+	if (len == 0)
+		return 1;
+
+	xtensa_kio_paddr = of_read_ulong(ranges+1, 1);
+	/* round down to nearest 256MB boundary */
+	xtensa_kio_paddr &= 0xf0000000;
+
+	return 1;
+}
+#else
+static int __init xtensa_dt_io_area(unsigned long node, const char *uname,
+		int depth, void *data)
+{
+	return 1;
+}
+#endif
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
diff --git a/block/blk-core.c b/block/blk-core.c
index 5a750b18172e..cba537b18d36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3191,7 +3191,8 @@ int __init blk_dev_init(void)
 
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
-					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+					    WQ_MEM_RECLAIM | WQ_HIGHPRI |
+					    WQ_POWER_EFFICIENT, 0);
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 9c4bb8266bc8..4464c823cff2 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		spin_lock_irqsave(&ioc->lock, flags);
 		if (!hlist_empty(&ioc->icq_list))
-			schedule_work(&ioc->release_work);
+			queue_work(system_power_efficient_wq,
+					&ioc->release_work);
 		else
 			free_ioc = true;
 		spin_unlock_irqrestore(&ioc->lock, flags);
diff --git a/block/genhd.c b/block/genhd.c
index e670148c3773..4700248b32e9 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1491,9 +1491,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	intv = disk_events_poll_jiffies(disk);
 	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, 0);
 	else if (intv)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, intv);
 out_unlock:
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1536,7 +1538,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
 	if (!ev->block)
-		mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
+		mod_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, 0);
 	spin_unlock_irq(&ev->lock);
 }
 
@@ -1629,7 +1632,8 @@ static void disk_check_events(struct disk_events *ev,
 
 	intv = disk_events_poll_jiffies(disk);
 	if (!ev->block && intv)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, intv);
 
 	spin_unlock_irq(&ev->lock);
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index bf8148e74e73..a1eba1845367 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -179,6 +179,10 @@ config CRYPTO_ABLK_HELPER_X86
 	depends on X86
 	select CRYPTO_CRYPTD
 
+config CRYPTO_ABLK_HELPER
+	tristate
+	select CRYPTO_CRYPTD
+
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
diff --git a/crypto/Makefile b/crypto/Makefile
index a8e9b0fefbe9..5d0b869b173f 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -101,3 +101,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
+obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
new file mode 100644
index 000000000000..62568b1fc885
--- /dev/null
+++ b/crypto/ablk_helper.c
@@ -0,0 +1,150 @@
+/*
+ * Shared async block cipher helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Based on aesni-intel_glue.c by:
+ *  Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <crypto/ablk_helper.h>
+#include <asm/simd.h>
+
+int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+		 unsigned int key_len)
+{
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+				    & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ablkcipher_setkey(child, key, key_len);
+	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+				    & CRYPTO_TFM_RES_MASK);
+	return err;
+}
+EXPORT_SYMBOL_GPL(ablk_set_key);
+
+int __ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct blkcipher_desc desc;
+
+	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+	desc.info = req->info;
+	desc.flags = 0;
+
+	return crypto_blkcipher_crt(desc.tfm)->encrypt(
+		&desc, req->dst, req->src, req->nbytes);
+}
+EXPORT_SYMBOL_GPL(__ablk_encrypt);
+
+int ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!may_use_simd()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_encrypt(cryptd_req);
+	} else {
+		return __ablk_encrypt(req);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_encrypt);
+
+int ablk_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!may_use_simd()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_decrypt(cryptd_req);
+	} else {
+		struct blkcipher_desc desc;
+
+		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+		desc.info = req->info;
+		desc.flags = 0;
+
+		return crypto_blkcipher_crt(desc.tfm)->decrypt(
+			&desc, req->dst, req->src, req->nbytes);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_decrypt);
+
+void ablk_exit(struct crypto_tfm *tfm)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+EXPORT_SYMBOL_GPL(ablk_exit);
+
+int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	ctx->cryptd_tfm = cryptd_tfm;
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ablk_init_common);
+
+int ablk_init(struct crypto_tfm *tfm)
+{
+	char drv_name[CRYPTO_MAX_ALG_NAME];
+
+	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+					crypto_tfm_alg_driver_name(tfm));
+
+	return ablk_init_common(tfm, drv_name);
+}
+EXPORT_SYMBOL_GPL(ablk_init);
+
+MODULE_LICENSE("GPL");
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index a79e7e9ab86e..0122bec38564 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -70,14 +70,12 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
-					       struct blkcipher_walk *walk,
+static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk,
 					       unsigned int bsize)
 {
 	u8 *addr;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 
-	addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	addr = blkcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
 	return bsize;
@@ -105,7 +103,6 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
 	unsigned int nbytes = 0;
 
 	if (likely(err >= 0)) {
@@ -117,7 +114,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc,
 			err = -EINVAL;
 			goto err;
 		} else
-			n = blkcipher_done_slow(tfm, walk, n);
+			n = blkcipher_done_slow(walk, n);
 
 		nbytes = walk->total - n;
 		err = 0;
@@ -136,7 +133,7 @@ err:
 	}
 
 	if (walk->iv != desc->info)
-		memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm));
+		memcpy(desc->info, walk->iv, walk->ivsize);
 	if (walk->buffer != walk->page)
 		kfree(walk->buffer);
 	if (walk->page)
@@ -226,22 +223,20 @@ static inline int blkcipher_next_fast(struct blkcipher_desc *desc,
 static int blkcipher_walk_next(struct blkcipher_desc *desc,
 			       struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 	unsigned int bsize;
 	unsigned int n;
 	int err;
 
 	n = walk->total;
-	if (unlikely(n < crypto_blkcipher_blocksize(tfm))) {
+	if (unlikely(n < walk->cipher_blocksize)) {
 		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
 		return blkcipher_walk_done(desc, walk, -EINVAL);
 	}
 
 	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
 			 BLKCIPHER_WALK_DIFF);
-	if (!scatterwalk_aligned(&walk->in, alignmask) ||
-	    !scatterwalk_aligned(&walk->out, alignmask)) {
+	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
+	    !scatterwalk_aligned(&walk->out, walk->alignmask)) {
 		walk->flags |= BLKCIPHER_WALK_COPY;
 		if (!walk->page) {
 			walk->page = (void *)__get_free_page(GFP_ATOMIC);
@@ -250,12 +245,12 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
 		}
 	}
 
-	bsize = min(walk->blocksize, n);
+	bsize = min(walk->walk_blocksize, n);
 	n = scatterwalk_clamp(&walk->in, n);
 	n = scatterwalk_clamp(&walk->out, n);
 
 	if (unlikely(n < bsize)) {
-		err = blkcipher_next_slow(desc, walk, bsize, alignmask);
+		err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask);
 		goto set_phys_lowmem;
 	}
 
@@ -277,28 +272,26 @@ set_phys_lowmem:
 	return err;
 }
 
-static inline int blkcipher_copy_iv(struct blkcipher_walk *walk,
-				    struct crypto_blkcipher *tfm,
-				    unsigned int alignmask)
+static inline int blkcipher_copy_iv(struct blkcipher_walk *walk)
 {
-	unsigned bs = walk->blocksize;
-	unsigned int ivsize = crypto_blkcipher_ivsize(tfm);
-	unsigned aligned_bs = ALIGN(bs, alignmask + 1);
-	unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
-			    (alignmask + 1);
+	unsigned bs = walk->walk_blocksize;
+	unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1);
+	unsigned int size = aligned_bs * 2 +
+			    walk->ivsize + max(aligned_bs, walk->ivsize) -
+			    (walk->alignmask + 1);
 	u8 *iv;
 
-	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1);
 	walk->buffer = kmalloc(size, GFP_ATOMIC);
 	if (!walk->buffer)
 		return -ENOMEM;
 
-	iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = blkcipher_get_spot(iv, ivsize);
+	iv = blkcipher_get_spot(iv, walk->ivsize);
 
-	walk->iv = memcpy(iv, walk->iv, ivsize);
+	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
 	return 0;
 }
 
@@ -306,7 +299,10 @@ int blkcipher_walk_virt(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
@@ -315,7 +311,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk)
 {
 	walk->flags |= BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
@@ -323,9 +322,6 @@ EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
 static int blkcipher_walk_first(struct blkcipher_desc *desc,
 				struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
-
 	if (WARN_ON_ONCE(in_irq()))
 		return -EDEADLK;
 
@@ -335,8 +331,8 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc,
 
 	walk->buffer = NULL;
 	walk->iv = desc->info;
-	if (unlikely(((unsigned long)walk->iv & alignmask))) {
-		int err = blkcipher_copy_iv(walk, tfm, alignmask);
+	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+		int err = blkcipher_copy_iv(walk);
 		if (err)
 			return err;
 	}
@@ -353,11 +349,28 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      unsigned int blocksize)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = blocksize;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block);
 
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize)
+{
+	walk->flags &= ~BLKCIPHER_WALK_PHYS;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_aead_blocksize(tfm);
+	walk->ivsize = crypto_aead_ivsize(tfm);
+	walk->alignmask = crypto_aead_alignmask(tfm);
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block);
+
 static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
 			    unsigned int keylen)
 {
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9953a42809ec..d27feb5460f3 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig"
 
 source "drivers/reset/Kconfig"
 
+source "drivers/gator/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 130abc1dfd65..092a62e79688 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -152,3 +152,5 @@ obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_IPACK_BUS)		+= ipack/
 obj-$(CONFIG_NTB)		+= ntb/
+
+obj-$(CONFIG_GATOR)		+= gator/
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 07abd9d76f7f..4fe77b887457 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES
 	bool
 	default n
 
+config HAVE_CPU_AUTOPROBE
+	def_bool ARCH_HAS_CPU_AUTOPROBE
+
+config GENERIC_CPU_AUTOPROBE
+	bool
+	depends on !ARCH_HAS_CPU_AUTOPROBE
+	select HAVE_CPU_AUTOPROBE
+
 config SOC_BUS
 	bool
 
@@ -202,11 +210,9 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config CMA
-	bool "Contiguous Memory Allocator"
-	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
-	select MIGRATION
-	select MEMORY_ISOLATION
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
 	help
 	  This enables the Contiguous Memory Allocator which allows drivers
 	  to allocate big physically-contiguous blocks of memory for use with
@@ -215,17 +221,7 @@ config CMA
 	  For more information see <include/linux/dma-contiguous.h>.
 	  If unsure, say "n".
 
-if CMA
-
-config CMA_DEBUG
-	bool "CMA debug messages (DEVELOPMENT)"
-	depends on DEBUG_KERNEL
-	help
-	  Turns on debug messages in CMA.  This produces KERN_DEBUG
-	  messages for every CMA call as well as various messages while
-	  processing calls such as dma_alloc_from_contiguous().
-	  This option does not affect warning and error messages.
-
+if  DMA_CMA
 comment "Default contiguous memory area size:"
 
 config CMA_SIZE_MBYTES
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4e22ce3ed73d..5d93bb519753 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
-obj-$(CONFIG_CMA) += dma-contiguous.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 3d48fc887ef4..607efe6b7dc8 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -13,6 +13,9 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/percpu.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/cpufeature.h>
 
 #include "base.h"
 
@@ -260,6 +263,45 @@ static void cpu_device_release(struct device *dev)
 	 */
 }
 
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+static ssize_t print_cpu_modalias(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	ssize_t n;
+	u32 i;
+
+	n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
+		    CPU_FEATURE_TYPEVAL);
+
+	for (i = 0; i < MAX_CPU_FEATURES; i++)
+		if (cpu_have_feature(i)) {
+			if (PAGE_SIZE < n + sizeof(",XXXX\n")) {
+				WARN(1, "CPU features overflow page\n");
+				break;
+			}
+			n += sprintf(&buf[n], ",%04X", i);
+		}
+	buf[n++] = '\n';
+	return n;
+}
+#else
+#define print_cpu_modalias	arch_print_cpu_modalias
+#endif
+
+static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (buf) {
+		print_cpu_modalias(NULL, NULL, buf);
+		add_uevent_var(env, "MODALIAS=%s", buf);
+		kfree(buf);
+	}
+	return 0;
+}
+#endif
+
 /*
  * register_cpu - Setup a sysfs device for a CPU.
  * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
@@ -277,8 +319,8 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
 	cpu->dev.id = num;
 	cpu->dev.bus = &cpu_subsys;
 	cpu->dev.release = cpu_device_release;
-#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
-	cpu->dev.bus->uevent = arch_cpu_uevent;
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
+	cpu->dev.bus->uevent = cpu_uevent;
 #endif
 	error = device_register(&cpu->dev);
 	if (!error && cpu->hotpluggable)
@@ -307,8 +349,8 @@ struct device *get_cpu_device(unsigned cpu)
 }
 EXPORT_SYMBOL_GPL(get_cpu_device);
 
-#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
-static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL);
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
+static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
 #endif
 
 static struct attribute *cpu_root_attrs[] = {
@@ -321,7 +363,7 @@ static struct attribute *cpu_root_attrs[] = {
 	&cpu_attrs[2].attr.attr,
 	&dev_attr_kernel_max.attr,
 	&dev_attr_offline.attr,
-#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
 	&dev_attr_modalias.attr,
 #endif
 	NULL
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 507379e7b763..545c4de412c3 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -91,7 +91,8 @@ static __always_inline struct devres * alloc_dr(dr_release_t release,
 	if (unlikely(!dr))
 		return NULL;
 
-	memset(dr, 0, tot_size);
+	memset(dr, 0, offsetof(struct devres, data));
+
 	INIT_LIST_HEAD(&dr->node.entry);
 	dr->node.release = release;
 	return dr;
@@ -110,7 +111,7 @@ void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
 	if (unlikely(!dr))
 		return NULL;
 	set_node_dbginfo(&dr->node, name, size);
@@ -135,7 +136,7 @@ void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
 	if (unlikely(!dr))
 		return NULL;
 	return dr->data;
@@ -745,58 +746,62 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
 EXPORT_SYMBOL_GPL(devm_remove_action);
 
 /*
- * Managed kzalloc/kfree
+ * Managed kmalloc/kfree
  */
-static void devm_kzalloc_release(struct device *dev, void *res)
+static void devm_kmalloc_release(struct device *dev, void *res)
 {
 	/* noop */
 }
 
-static int devm_kzalloc_match(struct device *dev, void *res, void *data)
+static int devm_kmalloc_match(struct device *dev, void *res, void *data)
 {
 	return res == data;
 }
 
 /**
- * devm_kzalloc - Resource-managed kzalloc
+ * devm_kmalloc - Resource-managed kmalloc
  * @dev: Device to allocate memory for
  * @size: Allocation size
  * @gfp: Allocation gfp flags
  *
- * Managed kzalloc.  Memory allocated with this function is
+ * Managed kmalloc.  Memory allocated with this function is
  * automatically freed on driver detach.  Like all other devres
  * resources, guaranteed alignment is unsigned long long.
  *
  * RETURNS:
  * Pointer to allocated memory on success, NULL on failure.
  */
-void * devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	struct devres *dr;
 
 	/* use raw alloc_dr for kmalloc caller tracing */
-	dr = alloc_dr(devm_kzalloc_release, size, gfp);
+	dr = alloc_dr(devm_kmalloc_release, size, gfp);
 	if (unlikely(!dr))
 		return NULL;
 
+	/*
+	 * This is named devm_kzalloc_release for historical reasons
+	 * The initial implementation did not support kmalloc, only kzalloc
+	 */
 	set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
 	devres_add(dev, dr->data);
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(devm_kzalloc);
+EXPORT_SYMBOL_GPL(devm_kmalloc);
 
 /**
  * devm_kfree - Resource-managed kfree
  * @dev: Device this memory belongs to
  * @p: Memory to free
  *
- * Free memory allocated with devm_kzalloc().
+ * Free memory allocated with devm_kmalloc().
  */
 void devm_kfree(struct device *dev, void *p)
 {
 	int rc;
 
-	rc = devres_destroy(dev, devm_kzalloc_release, devm_kzalloc_match, p);
+	rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, p);
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 0ca54421ce97..99802d6f3c60 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -96,7 +96,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
 #endif
 
 /**
- * dma_contiguous_reserve() - reserve area for contiguous memory handling
+ * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
  * @limit: End address of the reserved memory (optional, 0 for any).
  *
  * This function reserves memory from early allocator. It should be
@@ -124,22 +124,29 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 #endif
 	}
 
-	if (selected_size) {
+	if (selected_size && !dma_contiguous_default_area) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
 
-		dma_declare_contiguous(NULL, selected_size, 0, limit);
+		dma_contiguous_reserve_area(selected_size, 0, limit,
+					    &dma_contiguous_default_area);
 	}
 };
 
 static DEFINE_MUTEX(cma_mutex);
 
-static __init int cma_activate_area(unsigned long base_pfn, unsigned long count)
+static int __init cma_activate_area(struct cma *cma)
 {
-	unsigned long pfn = base_pfn;
-	unsigned i = count >> pageblock_order;
+	int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned i = cma->count >> pageblock_order;
 	struct zone *zone;
 
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		return -ENOMEM;
+
 	WARN_ON_ONCE(!pfn_valid(pfn));
 	zone = page_zone(pfn_to_page(pfn));
 
@@ -153,92 +160,53 @@ static __init int cma_activate_area(unsigned long base_pfn, unsigned long count)
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
-	return 0;
-}
-
-static __init struct cma *cma_create_area(unsigned long base_pfn,
-				     unsigned long count)
-{
-	int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
-	struct cma *cma;
-	int ret = -ENOMEM;
-
-	pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count);
-
-	cma = kmalloc(sizeof *cma, GFP_KERNEL);
-	if (!cma)
-		return ERR_PTR(-ENOMEM);
-
-	cma->base_pfn = base_pfn;
-	cma->count = count;
-	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 
-	if (!cma->bitmap)
-		goto no_mem;
-
-	ret = cma_activate_area(base_pfn, count);
-	if (ret)
-		goto error;
-
-	pr_debug("%s: returned %p\n", __func__, (void *)cma);
-	return cma;
-
-error:
-	kfree(cma->bitmap);
-no_mem:
-	kfree(cma);
-	return ERR_PTR(ret);
+	return 0;
 }
 
-static struct cma_reserved {
-	phys_addr_t start;
-	unsigned long size;
-	struct device *dev;
-} cma_reserved[MAX_CMA_AREAS] __initdata;
-static unsigned cma_reserved_count __initdata;
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
 
 static int __init cma_init_reserved_areas(void)
 {
-	struct cma_reserved *r = cma_reserved;
-	unsigned i = cma_reserved_count;
-
-	pr_debug("%s()\n", __func__);
+	int i;
 
-	for (; i; --i, ++r) {
-		struct cma *cma;
-		cma = cma_create_area(PFN_DOWN(r->start),
-				      r->size >> PAGE_SHIFT);
-		if (!IS_ERR(cma))
-			dev_set_cma_area(r->dev, cma);
+	for (i = 0; i < cma_area_count; i++) {
+		int ret = cma_activate_area(&cma_areas[i]);
+		if (ret)
+			return ret;
 	}
+
 	return 0;
 }
 core_initcall(cma_init_reserved_areas);
 
 /**
- * dma_declare_contiguous() - reserve area for contiguous memory handling
- *			      for particular device
- * @dev:   Pointer to device structure.
- * @size:  Size of the reserved memory.
- * @base:  Start address of the reserved memory (optional, 0 for any).
+ * dma_contiguous_reserve_area() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
  * @limit: End address of the reserved memory (optional, 0 for any).
+ * @res_cma: Pointer to store the created cma region.
  *
- * This function reserves memory for specified device. It should be
- * called by board specific code when early allocator (memblock or bootmem)
- * is still activate.
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas for specific
+ * devices.
  */
-int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
-				  phys_addr_t base, phys_addr_t limit)
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma)
 {
-	struct cma_reserved *r = &cma_reserved[cma_reserved_count];
+	struct cma *cma = &cma_areas[cma_area_count];
 	phys_addr_t alignment;
+	int ret = 0;
 
 	pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
 		 (unsigned long)size, (unsigned long)base,
 		 (unsigned long)limit);
 
 	/* Sanity checks */
-	if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) {
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
 		pr_err("Not enough slots for CMA reserved regions!\n");
 		return -ENOSPC;
 	}
@@ -256,7 +224,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
 	if (base) {
 		if (memblock_is_region_reserved(base, size) ||
 		    memblock_reserve(base, size) < 0) {
-			base = -EBUSY;
+			ret = -EBUSY;
 			goto err;
 		}
 	} else {
@@ -266,7 +234,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
 		 */
 		phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
 		if (!addr) {
-			base = -ENOMEM;
+			ret = -ENOMEM;
 			goto err;
 		} else {
 			base = addr;
@@ -277,10 +245,11 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
 	 * Each reserved area must be initialised later, when more kernel
 	 * subsystems (like slab allocator) are available.
 	 */
-	r->start = base;
-	r->size = size;
-	r->dev = dev;
-	cma_reserved_count++;
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	*res_cma = cma;
+	cma_area_count++;
+
 	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
 		(unsigned long)base);
 
@@ -289,7 +258,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
 	return 0;
 err:
 	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-	return base;
+	return ret;
 }
 
 /**
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 5a9b6569dd74..220ec3a3ca75 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -28,6 +28,8 @@
 #include <linux/sched.h>
 #include <linux/async.h>
 #include <linux/suspend.h>
+#include <trace/events/power.h>
+#include <linux/cpufreq.h>
 #include <linux/cpuidle.h>
 #include "../base.h"
 #include "power.h"
@@ -713,6 +715,8 @@ void dpm_resume(pm_message_t state)
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, NULL);
+
+	cpufreq_resume();
 }
 
 /**
@@ -1177,6 +1181,8 @@ int dpm_suspend(pm_message_t state)
 
 	might_sleep();
 
+	cpufreq_suspend();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index b05ecab915c4..5286e2d333b0 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -26,4 +26,11 @@ config OMAP_INTERCONNECT
 
 	help
 	  Driver to enable OMAP interconnect error handling driver.
+
+config ARM_CCI
+	bool "ARM CCI driver support"
+	depends on ARM
+	help
+	  Driver supporting the CCI cache coherent interconnect for ARM
+	  platforms.
 endmenu
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 3c7b53c12091..670cea443802 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_OMAP_OCP2SCP)	+= omap-ocp2scp.o
 
 # Interconnect bus driver for OMAP SoCs.
 obj-$(CONFIG_OMAP_INTERCONNECT)	+= omap_l3_smx.o omap_l3_noc.o
+# CCI cache coherent interconnect for ARM platforms
+obj-$(CONFIG_ARM_CCI)		+= arm-cci.o
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
new file mode 100644
index 000000000000..d9d954eb0fa0
--- /dev/null
+++ b/drivers/bus/arm-cci.c
@@ -0,0 +1,947 @@
+/*
+ * CCI cache coherent interconnect driver
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+#include <asm/smp_plat.h>
+
+#define DRIVER_NAME		"CCI"
+
+#define CCI_PORT_CTRL		0x0
+#define CCI_CTRL_STATUS		0xc
+
+#define CCI_ENABLE_SNOOP_REQ	0x1
+#define CCI_ENABLE_DVM_REQ	0x2
+#define CCI_ENABLE_REQ		(CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ)
+
+struct cci_nb_ports {
+	unsigned int nb_ace;
+	unsigned int nb_ace_lite;
+};
+
+enum cci_ace_port_type {
+	ACE_INVALID_PORT = 0x0,
+	ACE_PORT,
+	ACE_LITE_PORT,
+};
+
+struct cci_ace_port {
+	void __iomem *base;
+	unsigned long phys;
+	enum cci_ace_port_type type;
+	struct device_node *dn;
+};
+
+static struct cci_ace_port *ports;
+static unsigned int nb_cci_ports;
+
+static void __iomem *cci_ctrl_base;
+static unsigned long cci_ctrl_phys;
+
+#ifdef CONFIG_HW_PERF_EVENTS
+
+static void __iomem *cci_pmu_base;
+
+#define CCI400_PMCR		0x0100
+
+#define CCI400_PMU_CYCLE_CNTR_BASE    0x0000
+#define CCI400_PMU_CNTR_BASE(idx)     (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000)
+
+#define CCI400_PMCR_CEN          0x00000001
+#define CCI400_PMCR_RST          0x00000002
+#define CCI400_PMCR_CCR          0x00000004
+#define CCI400_PMCR_CCD          0x00000008
+#define CCI400_PMCR_EX           0x00000010
+#define CCI400_PMCR_DP           0x00000020
+#define CCI400_PMCR_NCNT_MASK    0x0000F800
+#define CCI400_PMCR_NCNT_SHIFT   11
+
+#define CCI400_PMU_EVT_SEL       0x000
+#define CCI400_PMU_CNTR          0x004
+#define CCI400_PMU_CNTR_CTRL     0x008
+#define CCI400_PMU_OVERFLOW      0x00C
+
+#define CCI400_PMU_OVERFLOW_FLAG 1
+
+enum cci400_perf_events {
+	CCI400_PMU_CYCLES = 0xFF
+};
+
+#define CCI400_PMU_EVENT_MASK   0xff
+#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7)
+#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f)
+
+#define CCI400_PMU_EVENT_SOURCE_S0 0
+#define CCI400_PMU_EVENT_SOURCE_S4 4
+#define CCI400_PMU_EVENT_SOURCE_M0 5
+#define CCI400_PMU_EVENT_SOURCE_M2 7
+
+#define CCI400_PMU_EVENT_SLAVE_MIN 0x0
+#define CCI400_PMU_EVENT_SLAVE_MAX 0x13
+
+#define CCI400_PMU_EVENT_MASTER_MIN 0x14
+#define CCI400_PMU_EVENT_MASTER_MAX 0x1A
+
+#define CCI400_PMU_MAX_HW_EVENTS 5   /* CCI PMU has 4 counters + 1 cycle counter */
+
+#define CCI400_PMU_CYCLE_COUNTER_IDX 0
+#define CCI400_PMU_COUNTER0_IDX      1
+#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1)
+
+
+static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS];
+static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)];
+static struct pmu_hw_events cci_hw_events = {
+	.events    = events,
+	.used_mask = used_mask,
+};
+
+static int cci_pmu_validate_hw_event(u8 hw_event)
+{
+	u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
+
+	if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 &&
+	    ev_code <= CCI400_PMU_EVENT_SLAVE_MAX)
+			return hw_event;
+	else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source &&
+		   ev_source <= CCI400_PMU_EVENT_SOURCE_M2 &&
+		   CCI400_PMU_EVENT_MASTER_MIN <= ev_code &&
+		    ev_code <= CCI400_PMU_EVENT_MASTER_MAX)
+			return hw_event;
+
+	return -EINVAL;
+}
+
+static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx)
+{
+	return CCI400_PMU_CYCLE_COUNTER_IDX <= idx &&
+		idx <= CCI400_PMU_COUNTER_LAST(cci_pmu);
+}
+
+static inline u32 cci_pmu_read_register(int idx, unsigned int offset)
+{
+	return readl_relaxed(cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset)
+{
+	return writel_relaxed(value, cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_disable_counter(int idx)
+{
+	cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_enable_counter(int idx)
+{
+	cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_select_event(int idx, unsigned long event)
+{
+	event &= CCI400_PMU_EVENT_MASK;
+	cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL);
+}
+
+static u32 cci_pmu_get_max_counters(void)
+{
+	u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI400_PMCR) &
+		      CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT;
+
+	/* add 1 for cycle counter */
+	return n_cnts + 1;
+}
+
+static struct pmu_hw_events *cci_pmu_get_hw_events(void)
+{
+	return &cci_hw_events;
+}
+
+static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_event = &event->hw;
+	unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK;
+	int idx;
+
+	if (cci_event == CCI400_PMU_CYCLES) {
+		if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask))
+			return -EAGAIN;
+
+                return CCI400_PMU_CYCLE_COUNTER_IDX;
+        }
+
+	for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) {
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+	}
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int cci_pmu_map_event(struct perf_event *event)
+{
+	int mapping;
+	u8 config = event->attr.config & CCI400_PMU_EVENT_MASK;
+
+	if (event->attr.type < PERF_TYPE_MAX)
+		return -ENOENT;
+
+	/* 0xff is used to represent CCI Cycles */
+	if (config == 0xff)
+		mapping = config;
+	else
+		mapping = cci_pmu_validate_hw_event(config);
+
+	return mapping;
+}
+
+static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+{
+	int irq, err, i = 0;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	if (unlikely(!pmu_device))
+		return -ENODEV;
+
+	/* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU)
+	   nERRORIRQ will be handled by secure firmware on TC2. So we
+	   assume that all CCI interrupts listed in the linux device
+	   tree are PMU interrupts.
+
+	   The following code should then be able to handle different routing
+	   of the CCI PMU interrupts.
+	*/
+	while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+		err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu);
+		if (err) {
+			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+				irq);
+			return err;
+		}
+		i++;
+	}
+
+	return 0;
+}
+
+static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev)
+{
+	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	int idx;
+
+	regs = get_irq_regs();
+
+	/* Iterate over counters and update the corresponding perf events.
+	   This should work regardless of whether we have per-counter overflow
+	   interrupt or a combined overflow interrupt. */
+	for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) {
+		struct perf_event *event = events->events[idx];
+		struct hw_perf_event *hw_counter;
+
+		if (!event)
+			continue;
+
+		hw_counter = &event->hw;
+
+		/* Did this counter overflow? */
+		if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG))
+			continue;
+		cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW);
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hw_counter->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cci_pmu->disable(event);
+	}
+
+	irq_work_run();
+	return IRQ_HANDLED;
+}
+
+static void cci_pmu_free_irq(struct arm_pmu *cci_pmu)
+{
+	int irq, i = 0;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+		free_irq(irq, cci_pmu);
+		i++;
+	}
+}
+
+static void cci_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Configure the event to count, unless you are counting cycles */
+	if (idx != CCI400_PMU_CYCLE_COUNTER_IDX)
+		cci_pmu_select_event(idx, hw_counter->config_base);
+
+	cci_pmu_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	cci_pmu_disable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_start(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Enable all the PMU counters. */
+	val = readl(cci_ctrl_base + CCI400_PMCR) | CCI400_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI400_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_stop(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable all the PMU counters. */
+	val = readl(cci_ctrl_base + CCI400_PMCR) & ~CCI400_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI400_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u32 cci_pmu_read_counter(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = cci_pmu_read_register(idx, CCI400_PMU_CNTR);
+
+	return value;
+}
+
+static void cci_pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx)))
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+	else
+		cci_pmu_write_register(value, idx, CCI400_PMU_CNTR);
+}
+
+static struct arm_pmu cci_pmu = {
+	.name             = DRIVER_NAME,
+	.max_period       = (1LLU << 32) - 1,
+	.get_hw_events    = cci_pmu_get_hw_events,
+	.get_event_idx    = cci_pmu_get_event_idx,
+	.map_event        = cci_pmu_map_event,
+	.request_irq      = cci_pmu_request_irq,
+	.handle_irq       = cci_pmu_handle_irq,
+	.free_irq         = cci_pmu_free_irq,
+	.enable           = cci_pmu_enable_event,
+	.disable          = cci_pmu_disable_event,
+	.start            = cci_pmu_start,
+	.stop             = cci_pmu_stop,
+	.read_counter     = cci_pmu_read_counter,
+	.write_counter    = cci_pmu_write_counter,
+};
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cci_pmu_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cci_pmu_base))
+		return PTR_ERR(cci_pmu_base);
+
+	cci_pmu.plat_device = pdev;
+	cci_pmu.num_events = cci_pmu_get_max_counters();
+	raw_spin_lock_init(&cci_hw_events.pmu_lock);
+	cpumask_setall(&cci_pmu.valid_cpus);
+
+	return armpmu_register(&cci_pmu, -1);
+}
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+	{.compatible = "arm,cci-400-pmu"},
+	{},
+};
+
+static struct platform_driver cci_pmu_platform_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = arm_cci_pmu_matches,
+		  },
+	.probe = cci_pmu_probe,
+};
+
+static int __init cci_pmu_init(void)
+{
+	if (platform_driver_register(&cci_pmu_platform_driver))
+		WARN(1, "unable to register CCI platform driver\n");
+	return 0;
+}
+
+#else
+
+static int __init cci_pmu_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
+struct cpu_port {
+	u64 mpidr;
+	u32 port;
+};
+
+/*
+ * Use the port MSB as valid flag, shift can be made dynamic
+ * by computing number of bits required for port indexes.
+ * Code disabling CCI cpu ports runs with D-cache invalidated
+ * and SCTLR bit clear so data accesses must be kept to a minimum
+ * to improve performance; for now shift is left static to
+ * avoid one more data access while disabling the CCI port.
+ */
+#define PORT_VALID_SHIFT	31
+#define PORT_VALID		(0x1 << PORT_VALID_SHIFT)
+
+static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr)
+{
+	port->port = PORT_VALID | index;
+	port->mpidr = mpidr;
+}
+
+static inline bool cpu_port_is_valid(struct cpu_port *port)
+{
+	return !!(port->port & PORT_VALID);
+}
+
+static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr)
+{
+	return port->mpidr == (mpidr & MPIDR_HWID_BITMASK);
+}
+
+static struct cpu_port cpu_port[NR_CPUS];
+
+/**
+ * __cci_ace_get_port - Function to retrieve the port index connected to
+ *			a cpu or device.
+ *
+ * @dn: device node of the device to look-up
+ * @type: port type
+ *
+ * Return value:
+ *	- CCI port index if success
+ *	- -ENODEV if failure
+ */
+static int __cci_ace_get_port(struct device_node *dn, int type)
+{
+	int i;
+	bool ace_match;
+	struct device_node *cci_portn;
+
+	cci_portn = of_parse_phandle(dn, "cci-control-port", 0);
+	for (i = 0; i < nb_cci_ports; i++) {
+		ace_match = ports[i].type == type;
+		if (ace_match && cci_portn == ports[i].dn)
+			return i;
+	}
+	return -ENODEV;
+}
+
+int cci_ace_get_port(struct device_node *dn)
+{
+	return __cci_ace_get_port(dn, ACE_LITE_PORT);
+}
+EXPORT_SYMBOL_GPL(cci_ace_get_port);
+
+static void __init cci_ace_init_ports(void)
+{
+	int port, ac, cpu;
+	u64 hwid;
+	const u32 *cell;
+	struct device_node *cpun, *cpus;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (WARN(!cpus, "Missing cpus node, bailing out\n"))
+		return;
+
+	if (WARN_ON(of_property_read_u32(cpus, "#address-cells", &ac)))
+		ac = of_n_addr_cells(cpus);
+
+	/*
+	 * Port index look-up speeds up the function disabling ports by CPU,
+	 * since the logical to port index mapping is done once and does
+	 * not change after system boot.
+	 * The stashed index array is initialized for all possible CPUs
+	 * at probe time.
+	 */
+	for_each_child_of_node(cpus, cpun) {
+		if (of_node_cmp(cpun->type, "cpu"))
+			continue;
+		cell = of_get_property(cpun, "reg", NULL);
+		if (WARN(!cell, "%s: missing reg property\n", cpun->full_name))
+			continue;
+
+		hwid = of_read_number(cell, ac);
+		cpu = get_logical_index(hwid & MPIDR_HWID_BITMASK);
+
+		if (cpu < 0 || !cpu_possible(cpu))
+			continue;
+		port = __cci_ace_get_port(cpun, ACE_PORT);
+		if (port < 0)
+			continue;
+
+		init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu));
+	}
+
+	for_each_possible_cpu(cpu) {
+		WARN(!cpu_port_is_valid(&cpu_port[cpu]),
+			"CPU %u does not have an associated CCI port\n",
+			cpu);
+	}
+}
+/*
+ * Functions to enable/disable a CCI interconnect slave port
+ *
+ * They are called by low-level power management code to disable slave
+ * interfaces snoops and DVM broadcast.
+ * Since they may execute with cache data allocation disabled and
+ * after the caches have been cleaned and invalidated the functions provide
+ * no explicit locking since they may run with D-cache disabled, so normal
+ * cacheable kernel locks based on ldrex/strex may not work.
+ * Locking has to be provided by BSP implementations to ensure proper
+ * operations.
+ */
+
+/**
+ * cci_port_control() - function to control a CCI port
+ *
+ * @port: index of the port to setup
+ * @enable: if true enables the port, if false disables it
+ */
+static void notrace cci_port_control(unsigned int port, bool enable)
+{
+	void __iomem *base = ports[port].base;
+
+	writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL);
+	/*
+	 * This function is called from power down procedures
+	 * and must not execute any instruction that might
+	 * cause the processor to be put in a quiescent state
+	 * (eg wfi). Hence, cpu_relax() can not be added to this
+	 * read loop to optimize power, since it might hide possibly
+	 * disruptive operations.
+	 */
+	while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1)
+			;
+}
+
+/**
+ * cci_disable_port_by_cpu() - function to disable a CCI port by CPU
+ *			       reference
+ *
+ * @mpidr: mpidr of the CPU whose CCI port should be disabled
+ *
+ * Disabling a CCI port for a CPU implies disabling the CCI port
+ * controlling that CPU cluster. Code disabling CPU CCI ports
+ * must make sure that the CPU running the code is the last active CPU
+ * in the cluster ie all other CPUs are quiescent in a low power state.
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port look-up failure
+ */
+int notrace cci_disable_port_by_cpu(u64 mpidr)
+{
+	int cpu;
+	bool is_valid;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+		is_valid = cpu_port_is_valid(&cpu_port[cpu]);
+		if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) {
+			cci_port_control(cpu_port[cpu].port, false);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu);
+
+/**
+ * cci_enable_port_for_self() - enable a CCI port for calling CPU
+ *
+ * Enabling a CCI port for the calling CPU implies enabling the CCI
+ * port controlling that CPU's cluster. Caller must make sure that the
+ * CPU running the code is the first active CPU in the cluster and all
+ * other CPUs are quiescent in a low power state  or waiting for this CPU
+ * to complete the CCI initialization.
+ *
+ * Because this is called when the MMU is still off and with no stack,
+ * the code must be position independent and ideally rely on callee
+ * clobbered registers only.  To achieve this we must code this function
+ * entirely in assembler.
+ *
+ * On success this returns with the proper CCI port enabled.  In case of
+ * any failure this never returns as the inability to enable the CCI is
+ * fatal and there is no possible recovery at this stage.
+ */
+asmlinkage void __naked cci_enable_port_for_self(void)
+{
+	asm volatile ("\n"
+
+"	mrc	p15, 0, r0, c0, c0, 5	@ get MPIDR value \n"
+"	and	r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n"
+"	adr	r1, 5f \n"
+"	ldr	r2, [r1] \n"
+"	add	r1, r1, r2		@ &cpu_port \n"
+"	add	ip, r1, %[sizeof_cpu_port] \n"
+
+	/* Loop over the cpu_port array looking for a matching MPIDR */
+"1:	ldr	r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n"
+"	cmp	r2, r0 			@ compare MPIDR \n"
+"	bne	2f \n"
+
+	/* Found a match, now test port validity */
+"	ldr	r3, [r1, %[offsetof_cpu_port_port]] \n"
+"	tst	r3, #"__stringify(PORT_VALID)" \n"
+"	bne	3f \n"
+
+	/* no match, loop with the next cpu_port entry */
+"2:	add	r1, r1, %[sizeof_struct_cpu_port] \n"
+"	cmp	r1, ip			@ done? \n"
+"	blo	1b \n"
+
+	/* CCI port not found -- cheaply try to stall this CPU */
+"cci_port_not_found: \n"
+"	wfi \n"
+"	wfe \n"
+"	b	cci_port_not_found \n"
+
+	/* Use matched port index to look up the corresponding ports entry */
+"3:	bic	r3, r3, #"__stringify(PORT_VALID)" \n"
+"	adr	r0, 6f \n"
+"	ldmia	r0, {r1, r2} \n"
+"	sub	r1, r1, r0 		@ virt - phys \n"
+"	ldr	r0, [r0, r2] 		@ *(&ports) \n"
+"	mov	r2, %[sizeof_struct_ace_port] \n"
+"	mla	r0, r2, r3, r0		@ &ports[index] \n"
+"	sub	r0, r0, r1		@ virt_to_phys() \n"
+
+	/* Enable the CCI port */
+"	ldr	r0, [r0, %[offsetof_port_phys]] \n"
+"	mov	r3, %[cci_enable_req]\n"		   
+"	str	r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n"
+
+	/* poll the status reg for completion */
+"	adr	r1, 7f \n"
+"	ldr	r0, [r1] \n"
+"	ldr	r0, [r0, r1]		@ cci_ctrl_base \n"
+"4:	ldr	r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n"
+"	tst	r1, %[cci_control_status_bits] \n"			
+"	bne	4b \n"
+
+"	mov	r0, #0 \n"
+"	bx	lr \n"
+
+"	.align	2 \n"
+"5:	.word	cpu_port - . \n"
+"6:	.word	. \n"
+"	.word	ports - 6b \n"
+"7:	.word	cci_ctrl_phys - . \n"
+	: :
+	[sizeof_cpu_port] "i" (sizeof(cpu_port)),
+	[cci_enable_req] "i" cpu_to_le32(CCI_ENABLE_REQ),
+	[cci_control_status_bits] "i" cpu_to_le32(1),
+#ifndef __ARMEB__
+	[offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)),
+#else
+	[offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4),
+#endif
+	[offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)),
+	[sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)),
+	[sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)),
+	[offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) );
+
+	unreachable();
+}
+
+/**
+ * __cci_control_port_by_device() - function to control a CCI port by device
+ *				    reference
+ *
+ * @dn: device node pointer of the device whose CCI port should be
+ *      controlled
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port look-up failure
+ */
+int notrace __cci_control_port_by_device(struct device_node *dn, bool enable)
+{
+	int port;
+
+	if (!dn)
+		return -ENODEV;
+
+	port = __cci_ace_get_port(dn, ACE_LITE_PORT);
+	if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n",
+				dn->full_name))
+		return -ENODEV;
+	cci_port_control(port, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_device);
+
+/**
+ * __cci_control_port_by_index() - function to control a CCI port by port index
+ *
+ * @port: port index previously retrieved with cci_ace_get_port()
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port index out of range
+ *	-EPERM if operation carried out on an ACE PORT
+ */
+int notrace __cci_control_port_by_index(u32 port, bool enable)
+{
+	if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT)
+		return -ENODEV;
+	/*
+	 * CCI control for ports connected to CPUS is extremely fragile
+	 * and must be made to go through a specific and controlled
+	 * interface (ie cci_disable_port_by_cpu(); control by general purpose
+	 * indexing is therefore disabled for ACE ports.
+	 */
+	if (ports[port].type == ACE_PORT)
+		return -EPERM;
+
+	cci_port_control(port, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_index);
+
+static const struct cci_nb_ports cci400_ports = {
+	.nb_ace = 2,
+	.nb_ace_lite = 3
+};
+
+static const struct of_device_id arm_cci_matches[] = {
+	{.compatible = "arm,cci-400", .data = &cci400_ports },
+	{},
+};
+
+static const struct of_device_id arm_cci_ctrl_if_matches[] = {
+	{.compatible = "arm,cci-400-ctrl-if", },
+	{},
+};
+
+static int __init cci_probe(void)
+{
+	struct cci_nb_ports const *cci_config;
+	int ret, i, nb_ace = 0, nb_ace_lite = 0;
+	struct device_node *np, *cp;
+	struct resource res;
+	const char *match_str;
+	bool is_ace;
+
+	np = of_find_matching_node(NULL, arm_cci_matches);
+	if (!np)
+		return -ENODEV;
+
+	cci_config = of_match_node(arm_cci_matches, np)->data;
+	if (!cci_config)
+		return -ENODEV;
+
+	nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite;
+
+	ports = kcalloc(sizeof(*ports), nb_cci_ports, GFP_KERNEL);
+	if (!ports)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (!ret) {
+		cci_ctrl_base = ioremap(res.start, resource_size(&res));
+		cci_ctrl_phys =	res.start;
+	}
+	if (ret || !cci_ctrl_base) {
+		WARN(1, "unable to ioremap CCI ctrl\n");
+		ret = -ENXIO;
+		goto memalloc_err;
+	}
+
+	for_each_child_of_node(np, cp) {
+		if (!of_match_node(arm_cci_ctrl_if_matches, cp))
+			continue;
+
+		i = nb_ace + nb_ace_lite;
+
+		if (i >= nb_cci_ports)
+			break;
+
+		if (of_property_read_string(cp, "interface-type",
+					&match_str)) {
+			WARN(1, "node %s missing interface-type property\n",
+				  cp->full_name);
+			continue;
+		}
+		is_ace = strcmp(match_str, "ace") == 0;
+		if (!is_ace && strcmp(match_str, "ace-lite")) {
+			WARN(1, "node %s containing invalid interface-type property, skipping it\n",
+					cp->full_name);
+			continue;
+		}
+
+		ret = of_address_to_resource(cp, 0, &res);
+		if (!ret) {
+			ports[i].base = ioremap(res.start, resource_size(&res));
+			ports[i].phys = res.start;
+		}
+		if (ret || !ports[i].base) {
+			WARN(1, "unable to ioremap CCI port %d\n", i);
+			continue;
+		}
+
+		if (is_ace) {
+			if (WARN_ON(nb_ace >= cci_config->nb_ace))
+				continue;
+			ports[i].type = ACE_PORT;
+			++nb_ace;
+		} else {
+			if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite))
+				continue;
+			ports[i].type = ACE_LITE_PORT;
+			++nb_ace_lite;
+		}
+		ports[i].dn = cp;
+	}
+
+	 /* initialize a stashed array of ACE ports to speed-up look-up */
+	cci_ace_init_ports();
+
+	/*
+	 * Multi-cluster systems may need this data when non-coherent, during
+	 * cluster power-up/power-down. Make sure it reaches main memory.
+	 */
+	sync_cache_w(&cci_ctrl_base);
+	sync_cache_w(&cci_ctrl_phys);
+	sync_cache_w(&ports);
+	sync_cache_w(&cpu_port);
+	__sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports);
+	pr_info("ARM CCI driver probed\n");
+	return 0;
+
+memalloc_err:
+
+	kfree(ports);
+	return ret;
+}
+
+static int cci_init_status = -EAGAIN;
+static DEFINE_MUTEX(cci_probing);
+
+static int __init cci_init(void)
+{
+	if (cci_init_status != -EAGAIN)
+		return cci_init_status;
+
+	mutex_lock(&cci_probing);
+	if (cci_init_status == -EAGAIN)
+		cci_init_status = cci_probe();
+	mutex_unlock(&cci_probing);
+	return cci_init_status;
+}
+
+/*
+ * To sort out early init calls ordering a helper function is provided to
+ * check if the CCI driver has beed initialized. Function check if the driver
+ * has been initialized, if not it calls the init function that probes
+ * the driver and updates the return value.
+ */
+bool __init cci_probed(void)
+{
+	return cci_init() == 0;
+}
+EXPORT_SYMBOL_GPL(cci_probed);
+
+early_initcall(cci_init);
+core_initcall(cci_pmu_init);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ARM CCI support");
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 0357ac44638b..d0d9b2124752 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -42,7 +42,7 @@ config COMMON_CLK_WM831X
 
 config COMMON_CLK_VERSATILE
 	bool "Clock driver for ARM Reference designs"
-	depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS
+	depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS || ARM64
 	---help---
           Supports clocking on ARM Reference designs:
 	  - Integrator/AP and Integrator/CP
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 1144e8c7579d..fccac4a41386 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2084,6 +2084,12 @@ struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
 	return clk;
 }
 
+int of_clk_get_parent_count(struct device_node *np)
+{
+	return of_count_phandle_with_args(np, "clocks", "#clock-cells");
+}
+EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
+
 const char *of_clk_get_parent_name(struct device_node *np, int index)
 {
 	struct of_phandle_args clkspec;
diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile
index c16ca787170a..6e76bf87ca87 100644
--- a/drivers/clk/versatile/Makefile
+++ b/drivers/clk/versatile/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_ARCH_INTEGRATOR)	+= clk-integrator.o
 obj-$(CONFIG_INTEGRATOR_IMPD1)	+= clk-impd1.o
 obj-$(CONFIG_ARCH_REALVIEW)	+= clk-realview.o
 obj-$(CONFIG_ARCH_VEXPRESS)	+= clk-vexpress.o clk-sp810.o
-obj-$(CONFIG_VEXPRESS_CONFIG)	+= clk-vexpress-osc.o
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= clk-vexpress-osc.o clk-vexpress-spc.o
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 8b8798bb93f3..a535c7bf8574 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -107,7 +107,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 	osc->func = vexpress_config_func_get_by_node(node);
 	if (!osc->func) {
 		pr_err("Failed to obtain config func for node '%s'!\n",
-				node->name);
+				node->full_name);
 		goto error;
 	}
 
@@ -119,7 +119,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 
 	of_property_read_string(node, "clock-output-names", &init.name);
 	if (!init.name)
-		init.name = node->name;
+		init.name = node->full_name;
 
 	init.ops = &vexpress_osc_ops;
 	init.flags = CLK_IS_ROOT;
diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c
new file mode 100644
index 000000000000..bb566e244b0c
--- /dev/null
+++ b/drivers/clk/versatile/clk-vexpress-spc.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2012 Linaro
+ *
+ * Author: Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/* SPC clock programming interface for Vexpress cpus */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+
+struct clk_spc {
+	struct clk_hw hw;
+	spinlock_t *lock;
+	int cluster;
+};
+
+#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw)
+
+static unsigned long spc_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+	u32 freq;
+
+	if (vexpress_spc_get_performance(spc->cluster, &freq)) {
+		return -EIO;
+		pr_err("%s: Failed", __func__);
+	}
+
+	return freq * 1000;
+}
+
+static long spc_round_rate(struct clk_hw *hw, unsigned long drate,
+		unsigned long *parent_rate)
+{
+	return drate;
+}
+
+static int spc_set_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+
+	return vexpress_spc_set_performance(spc->cluster, rate / 1000);
+}
+
+static struct clk_ops clk_spc_ops = {
+	.recalc_rate = spc_recalc_rate,
+	.round_rate = spc_round_rate,
+	.set_rate = spc_set_rate,
+};
+
+struct clk *vexpress_clk_register_spc(const char *name, int cluster_id)
+{
+	struct clk_init_data init;
+	struct clk_spc *spc;
+	struct clk *clk;
+
+	if (!name) {
+		pr_err("Invalid name passed");
+		return ERR_PTR(-EINVAL);
+	}
+
+	spc = kzalloc(sizeof(*spc), GFP_KERNEL);
+	if (!spc) {
+		pr_err("could not allocate spc clk\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spc->hw.init = &init;
+	spc->cluster = cluster_id;
+
+	init.name = name;
+	init.ops = &clk_spc_ops;
+	init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+	init.num_parents = 0;
+
+	clk = clk_register(NULL, &spc->hw);
+	if (!IS_ERR_OR_NULL(clk))
+		return clk;
+
+	pr_err("clk register failed\n");
+	kfree(spc);
+
+	return NULL;
+}
+
+#if defined(CONFIG_OF)
+void __init vexpress_clk_of_register_spc(void)
+{
+	char name[14] = "cpu-cluster.X";
+	struct device_node *node = NULL;
+	struct clk *clk;
+	const u32 *val;
+	int cluster_id = 0, len;
+
+	if (!of_find_compatible_node(NULL, NULL, "arm,vexpress-spc")) {
+		pr_debug("%s: No SPC found, Exiting!!\n", __func__);
+		return;
+	}
+
+	while ((node = of_find_node_by_name(node, "cluster"))) {
+		val = of_get_property(node, "reg", &len);
+		if (val && len == 4)
+			cluster_id = be32_to_cpup(val);
+
+		name[12] = cluster_id + '0';
+		clk = vexpress_clk_register_spc(name, cluster_id);
+		if (IS_ERR(clk))
+			return;
+
+		pr_debug("Registered clock '%s'\n", name);
+		clk_register_clkdev(clk, NULL, name);
+	}
+}
+CLK_OF_DECLARE(spc, "arm,vexpress-spc", vexpress_clk_of_register_spc);
+#endif
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index f151c6cf27c3..593485b02ee5 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -67,6 +67,21 @@ config ARM_ARCH_TIMER
 	bool
 	select CLKSRC_OF if OF
 
+config ARM_ARCH_TIMER_EVTSTREAM
+	bool "Support for ARM architected timer event stream generation"
+	default y if ARM_ARCH_TIMER
+	help
+	  This option enables support for event stream generation based on
+	  the ARM architected timer. It is used for waking up CPUs executing
+	  the wfe instruction at a frequency represented as a power-of-2
+	  divisor of the clock rate.
+	  The main use of the event stream is wfe-based timeouts of userspace
+	  locking implementations. It might also be useful for imposing timeout
+	  on wfe to safeguard against any programming errors in case an expected
+	  event is not generated.
+	  This must be disabled for hardware validation purposes to detect any
+	  hardware anomalies of missing events.
+
 config CLKSRC_METAG_GENERIC
 	def_bool y if METAG
 	help
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 053d846ab5b1..67bbbd8ae507 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -13,16 +13,43 @@
 #include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/of_irq.h>
+#include <linux/of_address.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 #include <asm/arch_timer.h>
 #include <asm/virt.h>
 
 #include <clocksource/arm_arch_timer.h>
 
+#define CNTTIDR		0x08
+#define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
+
+#define CNTVCT_LO	0x08
+#define CNTVCT_HI	0x0c
+#define CNTFRQ		0x10
+#define CNTP_TVAL	0x28
+#define CNTP_CTL	0x2c
+#define CNTV_TVAL	0x38
+#define CNTV_CTL	0x3c
+
+#define ARCH_CP15_TIMER	BIT(0)
+#define ARCH_MEM_TIMER	BIT(1)
+static unsigned arch_timers_present __initdata;
+
+static void __iomem *arch_counter_base;
+
+struct arch_timer {
+	void __iomem *base;
+	struct clock_event_device evt;
+};
+
+#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
+
 static u32 arch_timer_rate;
 
 enum ppi_nr {
@@ -38,19 +65,82 @@ static int arch_timer_ppi[MAX_TIMER_PPI];
 static struct clock_event_device __percpu *arch_timer_evt;
 
 static bool arch_timer_use_virtual = true;
+static bool arch_timer_mem_use_virtual;
 
 /*
  * Architected system timer support.
  */
 
-static inline irqreturn_t timer_handler(const int access,
+static __always_inline
+void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
+		struct clock_event_device *clk)
+{
+	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
+		struct arch_timer *timer = to_arch_timer(clk);
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			writel_relaxed(val, timer->base + CNTP_CTL);
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			writel_relaxed(val, timer->base + CNTP_TVAL);
+			break;
+		}
+	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
+		struct arch_timer *timer = to_arch_timer(clk);
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			writel_relaxed(val, timer->base + CNTV_CTL);
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			writel_relaxed(val, timer->base + CNTV_TVAL);
+			break;
+		}
+	} else {
+		arch_timer_reg_write_cp15(access, reg, val);
+	}
+}
+
+static __always_inline
+u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
+		struct clock_event_device *clk)
+{
+	u32 val;
+
+	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
+		struct arch_timer *timer = to_arch_timer(clk);
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			val = readl_relaxed(timer->base + CNTP_CTL);
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			val = readl_relaxed(timer->base + CNTP_TVAL);
+			break;
+		}
+	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
+		struct arch_timer *timer = to_arch_timer(clk);
+		switch (reg) {
+		case ARCH_TIMER_REG_CTRL:
+			val = readl_relaxed(timer->base + CNTV_CTL);
+			break;
+		case ARCH_TIMER_REG_TVAL:
+			val = readl_relaxed(timer->base + CNTV_TVAL);
+			break;
+		}
+	} else {
+		val = arch_timer_reg_read_cp15(access, reg);
+	}
+
+	return val;
+}
+
+static __always_inline irqreturn_t timer_handler(const int access,
 					struct clock_event_device *evt)
 {
 	unsigned long ctrl;
-	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
+	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
 	if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
 		ctrl |= ARCH_TIMER_CTRL_IT_MASK;
-		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
+		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
 		evt->event_handler(evt);
 		return IRQ_HANDLED;
 	}
@@ -72,15 +162,30 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
 	return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
 }
 
-static inline void timer_set_mode(const int access, int mode)
+static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
+}
+
+static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
+}
+
+static __always_inline void timer_set_mode(const int access, int mode,
+				  struct clock_event_device *clk)
 {
 	unsigned long ctrl;
 	switch (mode) {
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
+		ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
 		ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
-		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
+		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
 		break;
 	default:
 		break;
@@ -90,60 +195,121 @@ static inline void timer_set_mode(const int access, int mode)
 static void arch_timer_set_mode_virt(enum clock_event_mode mode,
 				     struct clock_event_device *clk)
 {
-	timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode);
+	timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode, clk);
 }
 
 static void arch_timer_set_mode_phys(enum clock_event_mode mode,
 				     struct clock_event_device *clk)
 {
-	timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode);
+	timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode, clk);
+}
+
+static void arch_timer_set_mode_virt_mem(enum clock_event_mode mode,
+					 struct clock_event_device *clk)
+{
+	timer_set_mode(ARCH_TIMER_MEM_VIRT_ACCESS, mode, clk);
+}
+
+static void arch_timer_set_mode_phys_mem(enum clock_event_mode mode,
+					 struct clock_event_device *clk)
+{
+	timer_set_mode(ARCH_TIMER_MEM_PHYS_ACCESS, mode, clk);
 }
 
-static inline void set_next_event(const int access, unsigned long evt)
+static __always_inline void set_next_event(const int access, unsigned long evt,
+				  struct clock_event_device *clk)
 {
 	unsigned long ctrl;
-	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
+	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
 	ctrl |= ARCH_TIMER_CTRL_ENABLE;
 	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
-	arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt);
-	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
+	arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
+	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
 }
 
 static int arch_timer_set_next_event_virt(unsigned long evt,
-					  struct clock_event_device *unused)
+					  struct clock_event_device *clk)
 {
-	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt);
+	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
 	return 0;
 }
 
 static int arch_timer_set_next_event_phys(unsigned long evt,
-					  struct clock_event_device *unused)
+					  struct clock_event_device *clk)
 {
-	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt);
+	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
 	return 0;
 }
 
-static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
+static void arch_timer_configure_evtstream(void)
 {
-	clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP;
-	clk->name = "arch_sys_timer";
-	clk->rating = 450;
-	if (arch_timer_use_virtual) {
-		clk->irq = arch_timer_ppi[VIRT_PPI];
-		clk->set_mode = arch_timer_set_mode_virt;
-		clk->set_next_event = arch_timer_set_next_event_virt;
+	int evt_stream_div, pos;
+
+	/* Find the closest power of two to the divisor */
+	evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ;
+	pos = fls(evt_stream_div);
+	if (pos > 1 && !(evt_stream_div & (1 << (pos - 2))))
+		pos--;
+	/* enable event stream */
+	arch_timer_evtstrm_enable(min(pos, 15));
+}
+
+static int arch_timer_set_next_event_virt_mem(unsigned long evt,
+					      struct clock_event_device *clk)
+{
+	set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
+	return 0;
+}
+
+static int arch_timer_set_next_event_phys_mem(unsigned long evt,
+					      struct clock_event_device *clk)
+{
+	set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
+	return 0;
+}
+
+static void __cpuinit __arch_timer_setup(unsigned type,
+				       struct clock_event_device *clk)
+{
+	clk->features = CLOCK_EVT_FEAT_ONESHOT;
+
+	if (type == ARCH_CP15_TIMER) {
+		clk->features |= CLOCK_EVT_FEAT_C3STOP;
+		clk->name = "arch_sys_timer";
+		clk->rating = 450;
+		clk->cpumask = cpumask_of(smp_processor_id());
+		if (arch_timer_use_virtual) {
+			clk->irq = arch_timer_ppi[VIRT_PPI];
+			clk->set_mode = arch_timer_set_mode_virt;
+			clk->set_next_event = arch_timer_set_next_event_virt;
+		} else {
+			clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
+			clk->set_mode = arch_timer_set_mode_phys;
+			clk->set_next_event = arch_timer_set_next_event_phys;
+		}
 	} else {
-		clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
-		clk->set_mode = arch_timer_set_mode_phys;
-		clk->set_next_event = arch_timer_set_next_event_phys;
+		clk->name = "arch_mem_timer";
+		clk->rating = 400;
+		clk->cpumask = cpu_all_mask;
+		if (arch_timer_mem_use_virtual) {
+			clk->set_mode = arch_timer_set_mode_virt_mem;
+			clk->set_next_event =
+				arch_timer_set_next_event_virt_mem;
+		} else {
+			clk->set_mode = arch_timer_set_mode_phys_mem;
+			clk->set_next_event =
+				arch_timer_set_next_event_phys_mem;
+		}
 	}
 
-	clk->cpumask = cpumask_of(smp_processor_id());
+	clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk);
 
-	clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL);
+	clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff);
+}
 
-	clockevents_config_and_register(clk, arch_timer_rate,
-					0xf, 0x7fffffff);
+static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
+{
+	__arch_timer_setup(ARCH_CP15_TIMER, clk);
 
 	if (arch_timer_use_virtual)
 		enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
@@ -154,31 +320,47 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
 	}
 
 	arch_counter_set_user_access();
+	if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
+		arch_timer_configure_evtstream();
 
 	return 0;
 }
 
-static int arch_timer_available(void)
+static void
+arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np)
 {
-	u32 freq;
-
-	if (arch_timer_rate == 0) {
-		freq = arch_timer_get_cntfrq();
-
-		/* Check the timer frequency. */
-		if (freq == 0) {
-			pr_warn("Architected timer frequency not available\n");
-			return -EINVAL;
-		}
+	/* Who has more than one independent system counter? */
+	if (arch_timer_rate)
+		return;
 
-		arch_timer_rate = freq;
+	/* Try to determine the frequency from the device tree or CNTFRQ */
+	if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
+		if (cntbase)
+			arch_timer_rate = readl_relaxed(cntbase + CNTFRQ);
+		else
+			arch_timer_rate = arch_timer_get_cntfrq();
 	}
 
-	pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n",
+	/* Check the timer frequency. */
+	if (arch_timer_rate == 0)
+		pr_warn("Architected timer frequency not available\n");
+}
+
+static void arch_timer_banner(unsigned type)
+{
+	pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
+		     type & ARCH_CP15_TIMER ? "cp15" : "",
+		     type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ?  " and " : "",
+		     type & ARCH_MEM_TIMER ? "mmio" : "",
 		     (unsigned long)arch_timer_rate / 1000000,
 		     (unsigned long)(arch_timer_rate / 10000) % 100,
-		     arch_timer_use_virtual ? "virt" : "phys");
-	return 0;
+		     type & ARCH_CP15_TIMER ?
+			arch_timer_use_virtual ? "virt" : "phys" :
+			"",
+		     type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ?  "/" : "",
+		     type & ARCH_MEM_TIMER ?
+			arch_timer_mem_use_virtual ? "virt" : "phys" :
+			"");
 }
 
 u32 arch_timer_get_rate(void)
@@ -186,11 +368,27 @@ u32 arch_timer_get_rate(void)
 	return arch_timer_rate;
 }
 
-u64 arch_timer_read_counter(void)
+static u64 arch_counter_get_cntvct_mem(void)
 {
-	return arch_counter_get_cntvct();
+	u32 vct_lo, vct_hi, tmp_hi;
+
+	do {
+		vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
+		vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO);
+		tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
+	} while (vct_hi != tmp_hi);
+
+	return ((u64) vct_hi << 32) | vct_lo;
 }
 
+/*
+ * Default to cp15 based access because arm64 uses this function for
+ * sched_clock() before DT is probed and the cp15 method is guaranteed
+ * to exist on arm64. arm doesn't use this before DT is probed so even
+ * if we don't have the cp15 accessors we won't have a problem.
+ */
+u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct;
+
 static cycle_t arch_counter_read(struct clocksource *cs)
 {
 	return arch_counter_get_cntvct();
@@ -221,6 +419,23 @@ struct timecounter *arch_timer_get_timecounter(void)
 	return &timecounter;
 }
 
+static void __init arch_counter_register(unsigned type)
+{
+	u64 start_count;
+
+	/* Register the CP15 based counter if we have one */
+	if (type & ARCH_CP15_TIMER)
+		arch_timer_read_counter = arch_counter_get_cntvct;
+	else
+		arch_timer_read_counter = arch_counter_get_cntvct_mem;
+
+	start_count = arch_timer_read_counter();
+	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
+	cyclecounter.mult = clocksource_counter.mult;
+	cyclecounter.shift = clocksource_counter.shift;
+	timecounter_init(&timecounter, &cyclecounter, start_count);
+}
+
 static void __cpuinit arch_timer_stop(struct clock_event_device *clk)
 {
 	pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n",
@@ -260,27 +475,44 @@ static struct notifier_block arch_timer_cpu_nb __cpuinitdata = {
 	.notifier_call = arch_timer_cpu_notify,
 };
 
+#ifdef CONFIG_CPU_PM
+static unsigned int saved_cntkctl;
+static int arch_timer_cpu_pm_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	if (action == CPU_PM_ENTER)
+		saved_cntkctl = arch_timer_get_cntkctl();
+	else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
+		arch_timer_set_cntkctl(saved_cntkctl);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block arch_timer_cpu_pm_notifier = {
+	.notifier_call = arch_timer_cpu_pm_notify,
+};
+
+static int __init arch_timer_cpu_pm_init(void)
+{
+	return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier);
+}
+#else
+static int __init arch_timer_cpu_pm_init(void)
+{
+	return 0;
+}
+#endif
+
 static int __init arch_timer_register(void)
 {
 	int err;
 	int ppi;
 
-	err = arch_timer_available();
-	if (err)
-		goto out;
-
 	arch_timer_evt = alloc_percpu(struct clock_event_device);
 	if (!arch_timer_evt) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
-	cyclecounter.mult = clocksource_counter.mult;
-	cyclecounter.shift = clocksource_counter.shift;
-	timecounter_init(&timecounter, &cyclecounter,
-			 arch_counter_get_cntvct());
-
 	if (arch_timer_use_virtual) {
 		ppi = arch_timer_ppi[VIRT_PPI];
 		err = request_percpu_irq(ppi, arch_timer_handler_virt,
@@ -309,11 +541,17 @@ static int __init arch_timer_register(void)
 	if (err)
 		goto out_free_irq;
 
+	err = arch_timer_cpu_pm_init();
+	if (err)
+		goto out_unreg_notify;
+
 	/* Immediately configure the timer on the boot CPU */
 	arch_timer_setup(this_cpu_ptr(arch_timer_evt));
 
 	return 0;
 
+out_unreg_notify:
+	unregister_cpu_notifier(&arch_timer_cpu_nb);
 out_free_irq:
 	if (arch_timer_use_virtual)
 		free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt);
@@ -331,24 +569,77 @@ out:
 	return err;
 }
 
+static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
+{
+	int ret;
+	irq_handler_t func;
+	struct arch_timer *t;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return -ENOMEM;
+
+	t->base = base;
+	t->evt.irq = irq;
+	__arch_timer_setup(ARCH_MEM_TIMER, &t->evt);
+
+	if (arch_timer_mem_use_virtual)
+		func = arch_timer_handler_virt_mem;
+	else
+		func = arch_timer_handler_phys_mem;
+
+	ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt);
+	if (ret) {
+		pr_err("arch_timer: Failed to request mem timer irq\n");
+		kfree(t);
+	}
+
+	return ret;
+}
+
+static const struct of_device_id arch_timer_of_match[] __initconst = {
+	{ .compatible   = "arm,armv7-timer",    },
+	{ .compatible   = "arm,armv8-timer",    },
+	{},
+};
+
+static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
+	{ .compatible   = "arm,armv7-timer-mem", },
+	{},
+};
+
+static void __init arch_timer_common_init(void)
+{
+	unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER;
+
+	/* Wait until both nodes are probed if we have two timers */
+	if ((arch_timers_present & mask) != mask) {
+		if (of_find_matching_node(NULL, arch_timer_mem_of_match) &&
+				!(arch_timers_present & ARCH_MEM_TIMER))
+			return;
+		if (of_find_matching_node(NULL, arch_timer_of_match) &&
+				!(arch_timers_present & ARCH_CP15_TIMER))
+			return;
+	}
+
+	arch_timer_banner(arch_timers_present);
+	arch_counter_register(arch_timers_present);
+	arch_timer_arch_init();
+}
+
 static void __init arch_timer_init(struct device_node *np)
 {
-	u32 freq;
 	int i;
 
-	if (arch_timer_get_rate()) {
+	if (arch_timers_present & ARCH_CP15_TIMER) {
 		pr_warn("arch_timer: multiple nodes in dt, skipping\n");
 		return;
 	}
 
-	/* Try to determine the frequency from the device tree or CNTFRQ */
-	if (!of_property_read_u32(np, "clock-frequency", &freq))
-		arch_timer_rate = freq;
-
+	arch_timers_present |= ARCH_CP15_TIMER;
 	for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
 		arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
-
-	of_node_put(np);
+	arch_timer_detect_rate(NULL, np);
 
 	/*
 	 * If HYP mode is available, we know that the physical timer
@@ -369,7 +660,73 @@ static void __init arch_timer_init(struct device_node *np)
 	}
 
 	arch_timer_register();
-	arch_timer_arch_init();
+	arch_timer_common_init();
 }
 CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init);
 CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init);
+
+static void __init arch_timer_mem_init(struct device_node *np)
+{
+	struct device_node *frame, *best_frame = NULL;
+	void __iomem *cntctlbase, *base;
+	unsigned int irq;
+	u32 cnttidr;
+
+	arch_timers_present |= ARCH_MEM_TIMER;
+	cntctlbase = of_iomap(np, 0);
+	if (!cntctlbase) {
+		pr_err("arch_timer: Can't find CNTCTLBase\n");
+		return;
+	}
+
+	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
+	iounmap(cntctlbase);
+
+	/*
+	 * Try to find a virtual capable frame. Otherwise fall back to a
+	 * physical capable frame.
+	 */
+	for_each_available_child_of_node(np, frame) {
+		int n;
+
+		if (of_property_read_u32(frame, "frame-number", &n)) {
+			pr_err("arch_timer: Missing frame-number\n");
+			of_node_put(best_frame);
+			of_node_put(frame);
+			return;
+		}
+
+		if (cnttidr & CNTTIDR_VIRT(n)) {
+			of_node_put(best_frame);
+			best_frame = frame;
+			arch_timer_mem_use_virtual = true;
+			break;
+		}
+		of_node_put(best_frame);
+		best_frame = of_node_get(frame);
+	}
+
+	base = arch_counter_base = of_iomap(best_frame, 0);
+	if (!base) {
+		pr_err("arch_timer: Can't map frame's registers\n");
+		of_node_put(best_frame);
+		return;
+	}
+
+	if (arch_timer_mem_use_virtual)
+		irq = irq_of_parse_and_map(best_frame, 1);
+	else
+		irq = irq_of_parse_and_map(best_frame, 0);
+	of_node_put(best_frame);
+	if (!irq) {
+		pr_err("arch_timer: Frame missing %s irq",
+				arch_timer_mem_use_virtual ? "virt" : "phys");
+		return;
+	}
+
+	arch_timer_detect_rate(base, np);
+	arch_timer_mem_register(base, irq);
+	arch_timer_common_init();
+}
+CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
+		       arch_timer_mem_init);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb825153..a9c1324843eb 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -201,7 +201,7 @@ source "drivers/cpufreq/Kconfig.x86"
 endmenu
 
 menu "ARM CPU frequency scaling drivers"
-depends on ARM
+depends on ARM || ARM64
 source "drivers/cpufreq/Kconfig.arm"
 endmenu
 
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 6e57543fe0b9..22727792350b 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -4,7 +4,8 @@
 
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
-	depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK
+	depends on (ARM_CPU_TOPOLOGY && BIG_LITTLE) || (ARM64 && SMP)
+	depends on PM_OPP && HAVE_CLK
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
 
@@ -15,6 +16,14 @@ config ARM_DT_BL_CPUFREQ
 	  This enables probing via DT for Generic CPUfreq driver for ARM
 	  big.LITTLE platform. This gets frequency tables from DT.
 
+config ARM_VEXPRESS_BL_CPUFREQ
+	tristate "ARM Vexpress big LITTLE CPUfreq driver"
+	select ARM_BIG_LITTLE_CPUFREQ
+	depends on VEXPRESS_SPC
+	help
+	  This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform.
+	  If in doubt, say N.
+
 config ARM_EXYNOS_CPUFREQ
 	bool "SAMSUNG EXYNOS SoCs"
 	depends on ARCH_EXYNOS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 3b95322fec5e..505c62bceb9d 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY)	+= amd_freq_sensitivity.o
 obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ)	+= arm_big_little.o
 # big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big
 # LITTLE drivers, so that it is probed last.
+obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ)	+= vexpress_big_little.o
 obj-$(CONFIG_ARM_DT_BL_CPUFREQ)		+= arm_big_little_dt.o
 
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 5d7f53fcd6f5..eaee6e222207 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -24,27 +24,148 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/export.h>
+#include <linux/mutex.h>
 #include <linux/of_platform.h>
 #include <linux/opp.h>
 #include <linux/slab.h>
 #include <linux/topology.h>
 #include <linux/types.h>
+#include <asm/bL_switcher.h>
 
 #include "arm_big_little.h"
 
-/* Currently we support only two clusters */
-#define MAX_CLUSTERS	2
+#ifdef CONFIG_BL_SWITCHER
+bool bL_switching_enabled;
+#endif
+
+#define ACTUAL_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0),
+	ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min;	/* (Big) clock frequencies */
+static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+	int j;
+	u32 max_freq = 0, cpu_freq;
+
+	for_each_online_cpu(j) {
+		cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+		if ((cluster == per_cpu(physical_cluster, j)) &&
+				(max_freq < cpu_freq))
+			max_freq = cpu_freq;
+	}
 
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+	pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
+			max_freq);
+
+	return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
 {
-	u32 cur_cluster = cpu_to_cluster(cpu);
+	u32 cur_cluster = per_cpu(physical_cluster, cpu);
+	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+	/* For switcher we use virtual A15 clock rates */
+	if (is_bL_switching_enabled())
+		rate = VIRT_FREQ(cur_cluster, rate);
+
+	pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+			cur_cluster, rate);
 
-	return clk_get_rate(clk[cur_cluster]) / 1000;
+	return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+	if (is_bL_switching_enabled()) {
+		pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq,
+					cpu));
+
+		return per_cpu(cpu_last_req_freq, cpu);
+	} else {
+		return clk_get_cpu_rate(cpu);
+	}
+}
+
+static unsigned int
+bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+	u32 new_rate, prev_rate;
+	int ret;
+	bool bLs = is_bL_switching_enabled();
+
+	mutex_lock(&cluster_lock[new_cluster]);
+
+	if (bLs) {
+		prev_rate = per_cpu(cpu_last_req_freq, cpu);
+		per_cpu(cpu_last_req_freq, cpu) = rate;
+		per_cpu(physical_cluster, cpu) = new_cluster;
+
+		new_rate = find_cluster_maxfreq(new_cluster);
+		new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+	} else {
+		new_rate = rate;
+	}
+
+	pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
+			__func__, cpu, old_cluster, new_cluster, new_rate);
+
+	ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+	if (WARN_ON(ret)) {
+		pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
+				new_cluster);
+		if (bLs) {
+			per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+			per_cpu(physical_cluster, cpu) = old_cluster;
+		}
+
+		mutex_unlock(&cluster_lock[new_cluster]);
+
+		return ret;
+	}
+
+	mutex_unlock(&cluster_lock[new_cluster]);
+
+	/* Recalc freq for old cluster when switching clusters */
+	if (old_cluster != new_cluster) {
+		pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
+				__func__, cpu, old_cluster, new_cluster);
+
+		/* Switch cluster */
+		bL_switch_request(cpu, new_cluster);
+
+		mutex_lock(&cluster_lock[old_cluster]);
+
+		/* Set freq of old cluster if there are cpus left on it */
+		new_rate = find_cluster_maxfreq(old_cluster);
+		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+		if (new_rate) {
+			pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
+					__func__, old_cluster, new_rate);
+
+			if (clk_set_rate(clk[old_cluster], new_rate * 1000))
+				pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+						__func__, ret, old_cluster);
+		}
+		mutex_unlock(&cluster_lock[old_cluster]);
+	}
+
+	return 0;
 }
 
 /* Validate policy frequency range */
@@ -60,12 +181,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+	    actual_cluster;
 	int ret = 0;
 
-	cur_cluster = cpu_to_cluster(policy->cpu);
+	cur_cluster = cpu_to_cluster(cpu);
+	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
 
-	freqs.old = bL_cpufreq_get(policy->cpu);
+	freqs.old = bL_cpufreq_get_rate(cpu);
 
 	/* Determine valid target frequency using freq_table */
 	cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
@@ -79,13 +202,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
+	if (is_bL_switching_enabled()) {
+		if ((actual_cluster == A15_CLUSTER) &&
+				(freqs.new < clk_big_min)) {
+			new_cluster = A7_CLUSTER;
+		} else if ((actual_cluster == A7_CLUSTER) &&
+				(freqs.new > clk_little_max)) {
+			new_cluster = A15_CLUSTER;
+		}
+	}
+
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
-	ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
-	if (ret) {
-		pr_err("clk_set_rate failed: %d\n", ret);
+	ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new);
+	if (ret)
 		return ret;
-	}
 
 	policy->cur = freqs.new;
 
@@ -94,7 +225,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	return ret;
 }
 
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+	int count;
+
+	for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+		;
+
+	return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t min_freq = ~0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency < min_freq)
+			min_freq = table[i].frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t max_freq = 0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency > max_freq)
+			max_freq = table[i].frequency;
+	return max_freq;
+}
+
+static int merge_cluster_tables(void)
+{
+	int i, j, k = 0, count = 1;
+	struct cpufreq_frequency_table *table;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		count += get_table_count(freq_table[i]);
+
+	table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	freq_table[MAX_CLUSTERS] = table;
+
+	/* Add in reverse order to get freqs in increasing order */
+	for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+		for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+				j++) {
+			table[k].frequency = VIRT_FREQ(i,
+					freq_table[i][j].frequency);
+			pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+					table[k].frequency);
+			k++;
+		}
+	}
+
+	table[k].index = k;
+	table[k].frequency = CPUFREQ_TABLE_END;
+
+	pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+	return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 
@@ -105,10 +302,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 	}
 }
 
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	int i;
+
+	if (cluster < MAX_CLUSTERS)
+		return _put_cluster_clk_and_freq_table(cpu_dev);
+
+	if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+		return;
+
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return;
+		}
+
+		_put_cluster_clk_and_freq_table(cdev);
+	}
+
+	/* free virtual table */
+	kfree(freq_table[MAX_CLUSTERS]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
-	char name[14] = "cpu-cluster.";
+	char name[14] = "cpu-cluster.X";
 	int ret;
 
 	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -149,6 +371,62 @@ atomic_dec:
 	return ret;
 }
 
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	int i, ret;
+
+	if (cluster < MAX_CLUSTERS)
+		return _get_cluster_clk_and_freq_table(cpu_dev);
+
+	if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+		return 0;
+
+	/*
+	 * Get data for all clusters and fill virtual cluster with a merge of
+	 * both
+	 */
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return -ENODEV;
+		}
+
+		ret = _get_cluster_clk_and_freq_table(cdev);
+		if (ret)
+			goto put_clusters;
+	}
+
+	ret = merge_cluster_tables();
+	if (ret)
+		goto put_clusters;
+
+	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
+	clk_big_min = get_table_min(freq_table[0]);
+	clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
+
+	pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+			__func__, cluster, clk_big_min, clk_little_max);
+
+	return 0;
+
+put_clusters:
+	while (i--) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return -ENODEV;
+		}
+
+		_put_cluster_clk_and_freq_table(cdev);
+	}
+
+	atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+	return ret;
+}
+
 /* Per-CPU initialization */
 static int bL_cpufreq_init(struct cpufreq_policy *policy)
 {
@@ -177,37 +455,33 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 
 	cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
 
+	if (cur_cluster < MAX_CLUSTERS) {
+		int cpu;
+
+		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+
+		for_each_cpu(cpu, policy->cpus)
+			per_cpu(physical_cluster, cpu) = cur_cluster;
+	} else {
+		/* Assumption: during init, we are always running on A15 */
+		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+	}
+
 	if (arm_bL_ops->get_transition_latency)
 		policy->cpuinfo.transition_latency =
 			arm_bL_ops->get_transition_latency(cpu_dev);
 	else
 		policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 
-	policy->cur = bL_cpufreq_get(policy->cpu);
+	policy->cur = clk_get_cpu_rate(policy->cpu);
 
-	cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+	if (is_bL_switching_enabled())
+		per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
 
 	dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
 	return 0;
 }
 
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
-	struct device *cpu_dev;
-
-	cpu_dev = get_cpu_device(policy->cpu);
-	if (!cpu_dev) {
-		pr_err("%s: failed to get cpu%d device\n", __func__,
-				policy->cpu);
-		return -ENODEV;
-	}
-
-	put_cluster_clk_and_freq_table(cpu_dev);
-	dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
-	return 0;
-}
-
 /* Export freq_table to sysfs */
 static struct freq_attr *bL_cpufreq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
@@ -219,16 +493,47 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.flags			= CPUFREQ_STICKY,
 	.verify			= bL_cpufreq_verify_policy,
 	.target			= bL_cpufreq_set_target,
-	.get			= bL_cpufreq_get,
+	.get			= bL_cpufreq_get_rate,
 	.init			= bL_cpufreq_init,
-	.exit			= bL_cpufreq_exit,
 	.have_governor_per_policy = true,
 	.attr			= bL_cpufreq_attr,
 };
 
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	pr_debug("%s: action: %ld\n", __func__, action);
+
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_unregister_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+		set_switching_enabled(true);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_DISABLE:
+		set_switching_enabled(false);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+	.notifier_call = bL_cpufreq_switcher_notifier,
+};
+
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 {
-	int ret;
+	int ret, i;
 
 	if (arm_bL_ops) {
 		pr_debug("%s: Already registered: %s, exiting\n", __func__,
@@ -243,16 +548,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 
 	arm_bL_ops = ops;
 
+	ret = bL_switcher_get_enabled();
+	set_switching_enabled(ret);
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		mutex_init(&cluster_lock[i]);
+
 	ret = cpufreq_register_driver(&bL_cpufreq_driver);
 	if (ret) {
 		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
 				__func__, ops->name, ret);
 		arm_bL_ops = NULL;
 	} else {
-		pr_info("%s: Registered platform driver: %s\n", __func__,
-				ops->name);
+		ret = bL_switcher_register_notifier(&bL_switcher_notifier);
+		if (ret) {
+			cpufreq_unregister_driver(&bL_cpufreq_driver);
+			arm_bL_ops = NULL;
+		} else {
+			pr_info("%s: Registered platform driver: %s\n",
+					__func__, ops->name);
+		}
 	}
 
+	bL_switcher_put_enabled();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_register);
@@ -265,9 +583,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops)
 		return;
 	}
 
+	bL_switcher_get_enabled();
+	bL_switcher_unregister_notifier(&bL_switcher_notifier);
 	cpufreq_unregister_driver(&bL_cpufreq_driver);
+	bL_switcher_put_enabled();
 	pr_info("%s: Un-registered platform driver: %s\n", __func__,
 			arm_bL_ops->name);
+
+	/* For saving table get/put on every cpu in/out */
+	if (is_bL_switching_enabled()) {
+		put_cluster_clk_and_freq_table(get_cpu_device(0));
+	} else {
+		int i;
+
+		for (i = 0; i < MAX_CLUSTERS; i++) {
+			struct device *cdev = get_cpu_device(i);
+			if (!cdev) {
+				pr_err("%s: failed to get cpu%d device\n",
+						__func__, i);
+				return;
+			}
+
+			put_cluster_clk_and_freq_table(cdev);
+		}
+	}
+
 	arm_bL_ops = NULL;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index 79b2ce17884d..4f5a03d3aef6 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -23,6 +23,20 @@
 #include <linux/device.h>
 #include <linux/types.h>
 
+/* Currently we support only two clusters */
+#define A15_CLUSTER	0
+#define A7_CLUSTER	1
+#define MAX_CLUSTERS	2
+
+#ifdef CONFIG_BL_SWITCHER
+extern bool bL_switching_enabled;
+#define is_bL_switching_enabled()		bL_switching_enabled
+#define set_switching_enabled(x) 		(bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled()		false
+#define set_switching_enabled(x) 		do { } while (0)
+#endif
+
 struct cpufreq_arm_bL_ops {
 	char name[CPUFREQ_NAME_LEN];
 	int (*get_transition_latency)(struct device *cpu_dev);
@@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops {
 
 static inline int cpu_to_cluster(int cpu)
 {
-	return topology_physical_package_id(cpu);
+	return is_bL_switching_enabled() ? MAX_CLUSTERS:
+		topology_physical_package_id(cpu);
 }
 
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 66f6cf5064ec..ce94c323fa13 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -31,6 +31,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 #include <linux/syscore_ops.h>
+#include <linux/suspend.h>
+#include <linux/tick.h>
 
 #include <trace/events/power.h>
 
@@ -48,6 +50,14 @@ static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 static DEFINE_MUTEX(cpufreq_governor_lock);
 
+/* Flag to suspend/resume CPUFreq governors */
+static bool cpufreq_suspended;
+
+static inline bool has_target(void)
+{
+	return cpufreq_driver->target;
+}
+
 /*
  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
  * all cpufreq/hotplug/workqueue/etc related lock issues.
@@ -1275,82 +1285,89 @@ static struct subsys_interface cpufreq_interface = {
 
 
 /**
- * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
+ * cpufreq_suspend() - Suspend CPUFreq governors
  *
- * This function is only executed for the boot processor.  The other CPUs
- * have been put offline by means of CPU hotplug.
+ * Called during system wide Suspend/Hibernate cycles for suspending governors
+ * as some platforms can't change frequency after this point in suspend cycle.
+ * Because some of the devices (like: i2c, regulators, etc) they use for
+ * changing frequency are suspended quickly after this point.
  */
-static int cpufreq_bp_suspend(void)
+void cpufreq_suspend(void)
 {
-	int ret = 0;
+	struct cpufreq_policy *policy;
+	int cpu;
 
-	int cpu = smp_processor_id();
-	struct cpufreq_policy *cpu_policy;
+	if (!cpufreq_driver)
+		return;
 
-	pr_debug("suspending cpu %u\n", cpu);
+	if (!has_target())
+		return;
 
-	/* If there's no policy for the boot CPU, we have nothing to do. */
-	cpu_policy = cpufreq_cpu_get(cpu);
-	if (!cpu_policy)
-		return 0;
+	pr_debug("%s: Suspending Governors\n", __func__);
 
-	if (cpufreq_driver->suspend) {
-		ret = cpufreq_driver->suspend(cpu_policy);
-		if (ret)
-			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
-					"step on CPU %u\n", cpu_policy->cpu);
+	for_each_possible_cpu(cpu) {
+		if (!cpu_online(cpu))
+			continue;
+
+		policy = cpufreq_cpu_get(cpu);
+
+		if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
+			pr_err("%s: Failed to stop governor for policy: %p\n",
+				__func__, policy);
+		else if (cpufreq_driver->suspend
+		    && cpufreq_driver->suspend(policy))
+			pr_err("%s: Failed to suspend driver: %p\n", __func__,
+				policy);
 	}
 
-	cpufreq_cpu_put(cpu_policy);
-	return ret;
+	cpufreq_suspended = true;
 }
 
 /**
- * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
+ * cpufreq_resume() - Resume CPUFreq governors
  *
- *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
- *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
- *	    restored. It will verify that the current freq is in sync with
- *	    what we believe it to be. This is a bit later than when it
- *	    should be, but nonethteless it's better than calling
- *	    cpufreq_driver->get() here which might re-enable interrupts...
- *
- * This function is only executed for the boot CPU.  The other CPUs have not
- * been turned on yet.
+ * Called during system wide Suspend/Hibernate cycle for resuming governors that
+ * are suspended with cpufreq_suspend().
  */
-static void cpufreq_bp_resume(void)
+void cpufreq_resume(void)
 {
-	int ret = 0;
-
-	int cpu = smp_processor_id();
-	struct cpufreq_policy *cpu_policy;
+	struct cpufreq_policy *policy;
+	int cpu;
 
-	pr_debug("resuming cpu %u\n", cpu);
+	if (!cpufreq_driver)
+		return;
 
-	/* If there's no policy for the boot CPU, we have nothing to do. */
-	cpu_policy = cpufreq_cpu_get(cpu);
-	if (!cpu_policy)
+	if (!has_target())
 		return;
 
-	if (cpufreq_driver->resume) {
-		ret = cpufreq_driver->resume(cpu_policy);
-		if (ret) {
-			printk(KERN_ERR "cpufreq: resume failed in ->resume "
-					"step on CPU %u\n", cpu_policy->cpu);
-			goto fail;
-		}
-	}
+	pr_debug("%s: Resuming Governors\n", __func__);
 
-	schedule_work(&cpu_policy->update);
+	cpufreq_suspended = false;
 
-fail:
-	cpufreq_cpu_put(cpu_policy);
-}
+	for_each_possible_cpu(cpu) {
+		if (!cpu_online(cpu))
+			continue;
 
-static struct syscore_ops cpufreq_syscore_ops = {
-	.suspend	= cpufreq_bp_suspend,
-	.resume		= cpufreq_bp_resume,
-};
+		policy = cpufreq_cpu_get(cpu);
+
+		if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
+		    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
+			pr_err("%s: Failed to start governor for policy: %p\n",
+				__func__, policy);
+		else if (cpufreq_driver->resume
+		    && cpufreq_driver->resume(policy))
+			pr_err("%s: Failed to resume driver: %p\n", __func__,
+				policy);
+
+		/*
+		 * schedule call cpufreq_update_policy() for boot CPU, i.e. last
+		 * policy in list. It will verify that the current freq is in
+		 * sync with what we believe it to be.
+		 */
+		if (cpu == 0)
+			schedule_work(&policy->update);
+	}
+}
 
 /**
  *	cpufreq_get_current_driver - return current driver's name
@@ -1544,6 +1561,10 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	struct cpufreq_governor *gov = NULL;
 #endif
 
+	/* Don't start any governor operations if we are entering suspend */
+	if (cpufreq_suspended)
+		return 0;
+
 	if (policy->governor->max_transition_latency &&
 	    policy->cpuinfo.transition_latency >
 	    policy->governor->max_transition_latency) {
@@ -2001,7 +2022,6 @@ static int __init cpufreq_core_init(void)
 
 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 	BUG_ON(!cpufreq_global_kobject);
-	register_syscore_ops(&cpufreq_syscore_ops);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 7fb600239059..9823037f194b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -21,6 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
 #include <asm/cputime.h>
+#ifdef CONFIG_BL_SWITCHER
+#include <asm/bL_switcher.h>
+#endif
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -378,7 +381,7 @@ static struct notifier_block notifier_trans_block = {
 	.notifier_call = cpufreq_stat_notifier_trans
 };
 
-static int __init cpufreq_stats_init(void)
+static int cpufreq_stats_setup(void)
 {
 	int ret;
 	unsigned int cpu;
@@ -406,7 +409,8 @@ static int __init cpufreq_stats_init(void)
 
 	return 0;
 }
-static void __exit cpufreq_stats_exit(void)
+
+static void cpufreq_stats_cleanup(void)
 {
 	unsigned int cpu;
 
@@ -421,6 +425,54 @@ static void __exit cpufreq_stats_exit(void)
 	}
 }
 
+#ifdef CONFIG_BL_SWITCHER
+static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_stats_cleanup();
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+	case BL_NOTIFY_POST_DISABLE:
+		cpufreq_stats_setup();
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block switcher_notifier = {
+	.notifier_call = cpufreq_stats_switcher_notifier,
+};
+#endif
+
+static int __init cpufreq_stats_init(void)
+{
+	int ret;
+	spin_lock_init(&cpufreq_stats_lock);
+
+	ret = cpufreq_stats_setup();
+#ifdef CONFIG_BL_SWITCHER
+	if (!ret)
+		bL_switcher_register_notifier(&switcher_notifier);
+#endif
+	return ret;
+}
+
+static void __exit cpufreq_stats_exit(void)
+{
+#ifdef CONFIG_BL_SWITCHER
+	bL_switcher_unregister_notifier(&switcher_notifier);
+#endif
+	cpufreq_stats_cleanup();
+}
+
 MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
 MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
 				"through sysfs filesystem");
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index 8c4771da1b89..9216c2020ec1 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/platform_device.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c
new file mode 100644
index 000000000000..1abb883c051b
--- /dev/null
+++ b/drivers/cpufreq/vexpress_big_little.c
@@ -0,0 +1,86 @@
+/*
+ * Vexpress big.LITTLE CPUFreq Interface driver
+ *
+ * It provides necessary ops to arm_big_little cpufreq driver and gets
+ * Frequency information from Device Tree. Freq table in DT must be in KHz.
+ *
+ * Copyright (C) 2013 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpufreq.h>
+#include <linux/export.h>
+#include <linux/opp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+#include "arm_big_little.h"
+
+static int vexpress_init_opp_table(struct device *cpu_dev)
+{
+	int i = -1, count, cluster = cpu_to_cluster(cpu_dev->id);
+	u32 *table;
+	int ret;
+
+	count = vexpress_spc_get_freq_table(cluster, &table);
+	if (!table || !count) {
+		pr_err("SPC controller returned invalid freq table");
+		return -EINVAL;
+	}
+
+	while (++i < count) {
+		/* FIXME: Voltage value */
+		ret = opp_add(cpu_dev, table[i] * 1000, 900000);
+		if (ret) {
+			dev_warn(cpu_dev, "%s: Failed to add OPP %d, err: %d\n",
+				 __func__, table[i] * 1000, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int vexpress_get_transition_latency(struct device *cpu_dev)
+{
+	/* 1 ms */
+	return 1000000;
+}
+
+static struct cpufreq_arm_bL_ops vexpress_bL_ops = {
+	.name	= "vexpress-bL",
+	.get_transition_latency = vexpress_get_transition_latency,
+	.init_opp_table = vexpress_init_opp_table,
+};
+
+static int vexpress_bL_init(void)
+{
+	if (!vexpress_spc_check_loaded()) {
+		pr_info("%s: No SPC found\n", __func__);
+		return -ENOENT;
+	}
+
+	return bL_cpufreq_register(&vexpress_bL_ops);
+}
+module_init(vexpress_bL_init);
+
+static void vexpress_bL_exit(void)
+{
+	return bL_cpufreq_unregister(&vexpress_bL_ops);
+}
+module_exit(vexpress_bL_exit);
+
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c4cc27e5c8a5..842d7ba83101 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -31,6 +31,15 @@ config CPU_IDLE_GOV_MENU
 config ARCH_NEEDS_CPU_IDLE_COUPLED
 	def_bool n
 
+config OF_IDLE_STATES
+        bool "Idle states DT support"
+	depends on ARM || ARM64
+	default n
+	help
+	 Allows the CPU idle framework to initialize CPU idle drivers
+	 state data by using DT provided nodes compliant with idle states
+	 device tree bindings.
+
 if CPU_IDLE
 
 config CPU_IDLE_CALXEDA
@@ -39,4 +48,9 @@ config CPU_IDLE_CALXEDA
 	help
 	  Select this to enable cpuidle on Calxeda processors.
 
+menu "ARM64 CPU Idle Drivers"
+depends on ARM64
+source "drivers/cpuidle/Kconfig.arm64"
+endmenu
+
 endif
diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64
new file mode 100644
index 000000000000..b83612c67e6d
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.arm64
@@ -0,0 +1,13 @@
+#
+# ARM64 CPU Idle drivers
+#
+
+config ARM64_CPUIDLE
+	bool "Generic ARM64 CPU idle Driver"
+	select OF_IDLE_STATES
+	help
+	  Select this to enable generic cpuidle driver for ARM v8.
+	  It provides a generic idle driver whose idle states are configured
+	  at run-time through DT nodes. The CPUidle suspend backend is
+	  initialized by the device tree parsing code on matching the entry
+	  method to the respective CPU operations.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 0d8bd55e776f..2d97bcfecd00 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -4,6 +4,12 @@
 
 obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
 obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
+obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o
+obj-$(CONFIG_OF_IDLE_STATES)		  += of_idle_states.o
 
 obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
 obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
+
+###############################################################################
+# ARM64 drivers
+obj-$(CONFIG_ARM64_CPUIDLE)		+= cpuidle-arm64.o
diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c
new file mode 100644
index 000000000000..e5378896a8cb
--- /dev/null
+++ b/drivers/cpuidle/arm_big_little.c
@@ -0,0 +1,183 @@
+/*
+ * big.LITTLE CPU idle driver.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/bitmap.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/clockchips.h>
+#include <linux/debugfs.h>
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tick.h>
+#include <linux/vexpress.h>
+#include <asm/mcpm.h>
+#include <asm/cpuidle.h>
+#include <asm/cputype.h>
+#include <asm/idmap.h>
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+#include <linux/of.h>
+
+static int bl_cpuidle_simple_enter(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{
+	ktime_t time_start, time_end;
+	s64 diff;
+
+	time_start = ktime_get();
+
+	cpu_do_idle();
+
+	time_end = ktime_get();
+
+	local_irq_enable();
+
+	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
+
+	dev->last_residency = (int) diff;
+
+	return index;
+}
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx);
+
+static struct cpuidle_state bl_cpuidle_set[] __initdata = {
+	[0] = {
+		.enter                  = bl_cpuidle_simple_enter,
+		.exit_latency           = 1,
+		.target_residency       = 1,
+		.power_usage		= UINT_MAX,
+		.flags                  = CPUIDLE_FLAG_TIME_VALID,
+		.name                   = "WFI",
+		.desc                   = "ARM WFI",
+	},
+	[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 300,
+		.target_residency	= 1000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID,
+		.name			= "C1",
+		.desc			= "ARM power down",
+	},
+};
+
+struct cpuidle_driver bl_idle_driver = {
+	.name = "bl_idle",
+	.owner = THIS_MODULE,
+	.safe_state_index = 0
+};
+
+static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev);
+
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cluster = (mpidr >> 8) & 0xf;
+	unsigned int cpu = mpidr & 0xf;
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+	mcpm_cpu_suspend(0);  /* 0 should be replaced with better value here */
+	return 1;
+}
+
+/*
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	struct timespec ts_preidle, ts_postidle, ts_idle;
+	int ret;
+
+	/* Used to keep track of the total time in idle */
+	getnstimeofday(&ts_preidle);
+
+	BUG_ON(!irqs_disabled());
+
+	cpu_pm_enter();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+	ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher);
+	if (ret)
+		BUG();
+
+	mcpm_cpu_powered_up();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+	cpu_pm_exit();
+
+	getnstimeofday(&ts_postidle);
+	local_irq_enable();
+	ts_idle = timespec_sub(ts_postidle, ts_preidle);
+
+	dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC +
+					ts_idle.tv_sec * USEC_PER_SEC;
+	return idx;
+}
+
+/*
+ * bl_idle_init
+ *
+ * Registers the bl specific cpuidle driver with the cpuidle
+ * framework with the valid set of states.
+ */
+int __init bl_idle_init(void)
+{
+	struct cpuidle_device *dev;
+	int i, cpu_id;
+	struct cpuidle_driver *drv = &bl_idle_driver;
+
+	if (!of_find_compatible_node(NULL, NULL, "arm,generic")) {
+		pr_info("%s: No compatible node found\n", __func__);
+		return -ENODEV;
+	}
+
+	drv->state_count = (sizeof(bl_cpuidle_set) /
+				       sizeof(struct cpuidle_state));
+
+	for (i = 0; i < drv->state_count; i++) {
+		memcpy(&drv->states[i], &bl_cpuidle_set[i],
+				sizeof(struct cpuidle_state));
+	}
+
+	cpuidle_register_driver(drv);
+
+	for_each_cpu(cpu_id, cpu_online_mask) {
+		pr_err("CPUidle for CPU%d registered\n", cpu_id);
+		dev = &per_cpu(bl_idle_dev, cpu_id);
+		dev->cpu = cpu_id;
+
+		dev->state_count = drv->state_count;
+
+		if (cpuidle_register_device(dev)) {
+			printk(KERN_ERR "%s: Cpuidle register device failed\n",
+			       __func__);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+device_initcall(bl_idle_init);
diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c
new file mode 100644
index 000000000000..2cfde6ce3086
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-arm64.c
@@ -0,0 +1,159 @@
+/*
+ * ARM64 generic CPU idle driver.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <asm/psci.h>
+#include <asm/suspend.h>
+
+#include "of_idle_states.h"
+
+typedef int (*suspend_init_fn)(struct cpuidle_driver *,
+			       struct device_node *[]);
+
+struct cpu_suspend_ops {
+	const char *id;
+	suspend_init_fn init_fn;
+};
+
+static const struct cpu_suspend_ops suspend_operations[] __initconst = {
+	{"arm,psci", psci_dt_register_idle_states},
+	{}
+};
+
+static __init const struct cpu_suspend_ops *get_suspend_ops(const char *str)
+{
+	int i;
+
+	if (!str)
+		return NULL;
+
+	for (i = 0; suspend_operations[i].id; i++)
+		if (!strcmp(suspend_operations[i].id, str))
+			return &suspend_operations[i];
+
+	return NULL;
+}
+
+/*
+ * arm_enter_idle_state - Programs CPU to enter the specified state
+ *
+ * @dev: cpuidle device
+ * @drv: cpuidle driver
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int arm_enter_idle_state(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	int ret;
+
+	if (!idx) {
+		cpu_do_idle();
+		return idx;
+	}
+
+	cpu_pm_enter();
+	/*
+	 * Pass idle state index to cpu_suspend which in turn will call
+	 * the CPU ops suspend protocol with idle index as a parameter.
+	 *
+	 * Some states would not require context to be saved and flushed
+	 * to DRAM, so calling cpu_suspend would not be stricly necessary.
+	 * When power domains specifications for ARM CPUs are finalized then
+	 * this code can be optimized to prevent saving registers if not
+	 * needed.
+	 */
+	ret = cpu_suspend(idx);
+
+	cpu_pm_exit();
+
+	return ret ? -1 : idx;
+}
+
+struct cpuidle_driver arm64_idle_driver = {
+	.name = "arm64_idle",
+	.owner = THIS_MODULE,
+};
+
+static struct device_node *state_nodes[CPUIDLE_STATE_MAX] __initdata;
+
+/*
+ * arm64_idle_init
+ *
+ * Registers the arm64 specific cpuidle driver with the cpuidle
+ * framework. It relies on core code to parse the idle states
+ * and initialize them using driver data structures accordingly.
+ */
+static int __init arm64_idle_init(void)
+{
+	int i, ret;
+	const char *entry_method;
+	struct device_node *idle_states_node;
+	const struct cpu_suspend_ops *suspend_init;
+	struct cpuidle_driver *drv = &arm64_idle_driver;
+
+	idle_states_node = of_find_node_by_path("/cpus/idle-states");
+	if (!idle_states_node)
+		return -ENOENT;
+
+	if (of_property_read_string(idle_states_node, "entry-method",
+				    &entry_method)) {
+		pr_warn(" * %s missing entry-method property\n",
+			    idle_states_node->full_name);
+		of_node_put(idle_states_node);
+		return -EOPNOTSUPP;
+	}
+
+	suspend_init = get_suspend_ops(entry_method);
+	if (!suspend_init) {
+		pr_warn("Missing suspend initializer\n");
+		of_node_put(idle_states_node);
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * State at index 0 is standby wfi and considered standard
+	 * on all ARM platforms. If in some platforms simple wfi
+	 * can't be used as "state 0", DT bindings must be implemented
+	 * to work around this issue and allow installing a special
+	 * handler for idle state index 0.
+	 */
+	drv->states[0].exit_latency = 1;
+	drv->states[0].target_residency = 1;
+	drv->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	strncpy(drv->states[0].name, "ARM WFI", CPUIDLE_NAME_LEN);
+	strncpy(drv->states[0].desc, "ARM WFI", CPUIDLE_DESC_LEN);
+
+	drv->cpumask = (struct cpumask *) cpu_possible_mask;
+	/*
+	 * Start at index 1, request idle state nodes to be filled
+	 */
+	ret = of_init_idle_driver(drv, state_nodes, 1, true);
+	if (ret)
+		return ret;
+
+	if (suspend_init->init_fn(drv, state_nodes))
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < drv->state_count; i++)
+		drv->states[i].enter = arm_enter_idle_state;
+
+	return cpuidle_register(drv, NULL);
+}
+device_initcall(arm64_idle_init);
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 223379169cb0..0e6e408c0a63 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -37,20 +37,6 @@
 extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
 extern void *scu_base_addr;
 
-static inline unsigned int get_auxcr(void)
-{
-	unsigned int val;
-	asm("mrc p15, 0, %0, c1, c0, 1	@ get AUXCR" : "=r" (val) : : "cc");
-	return val;
-}
-
-static inline void set_auxcr(unsigned int val)
-{
-	asm volatile("mcr p15, 0, %0, c1, c0, 1	@ set AUXCR"
-	  : : "r" (val) : "cc");
-	isb();
-}
-
 static noinline void calxeda_idle_restore(void)
 {
 	set_cr(get_cr() | CR_C);
diff --git a/drivers/cpuidle/of_idle_states.c b/drivers/cpuidle/of_idle_states.c
new file mode 100644
index 000000000000..eceb1b4c4657
--- /dev/null
+++ b/drivers/cpuidle/of_idle_states.c
@@ -0,0 +1,274 @@
+/*
+ * OF idle states parsing code.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "of_idle_states.h"
+
+struct state_elem {
+	struct list_head list;
+	struct device_node *node;
+	int val;
+};
+
+static struct list_head head __initdata = LIST_HEAD_INIT(head);
+
+static bool __init state_cpu_valid(struct device_node *state_node,
+				   struct device_node *cpu_node)
+{
+	int i = 0;
+	struct device_node *cpu_state;
+
+	while ((cpu_state = of_parse_phandle(cpu_node,
+					     "cpu-idle-states", i++))) {
+		if (cpu_state && state_node == cpu_state) {
+			of_node_put(cpu_state);
+			return true;
+		}
+		of_node_put(cpu_state);
+	}
+	return false;
+}
+
+static bool __init state_cpus_valid(const cpumask_t *cpus,
+				    struct device_node *state_node)
+{
+	int cpu;
+	struct device_node *cpu_node;
+
+	/*
+	 * Check if state is valid on driver cpumask cpus
+	 */
+	for_each_cpu(cpu, cpus) {
+		cpu_node = of_get_cpu_node(cpu, NULL);
+
+		if (!cpu_node) {
+			pr_err("Missing device node for CPU %d\n", cpu);
+			return false;
+		}
+
+		if (!state_cpu_valid(state_node, cpu_node))
+			return false;
+	}
+
+	return true;
+}
+
+static int __init state_cmp(void *priv, struct list_head *a,
+			    struct list_head *b)
+{
+	struct state_elem *ela, *elb;
+
+	ela = container_of(a, struct state_elem, list);
+	elb = container_of(b, struct state_elem, list);
+
+	return ela->val - elb->val;
+}
+
+static int __init add_state_node(cpumask_t *cpumask,
+				 struct device_node *state_node)
+{
+	struct state_elem *el;
+	u32 val;
+
+	pr_debug(" * %s...\n", state_node->full_name);
+
+	if (!state_cpus_valid(cpumask, state_node))
+		return -EINVAL;
+	/*
+	 * Parse just the value required to sort the states.
+	 */
+	if (of_property_read_u32(state_node, "min-residency-us",
+				 &val)) {
+		pr_debug(" * %s missing min-residency-us property\n",
+			 state_node->full_name);
+		return -EINVAL;
+	}
+
+	el = kmalloc(sizeof(*el), GFP_KERNEL);
+	if (!el) {
+		pr_err("%s failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	el->node = state_node;
+	el->val = val;
+	list_add_tail(&el->list, &head);
+
+	return 0;
+}
+
+static void __init init_state_node(struct cpuidle_driver *drv,
+				   struct device_node *state_node,
+				   int *cnt)
+{
+	struct cpuidle_state *idle_state;
+
+	pr_debug(" * %s...\n", state_node->full_name);
+
+	idle_state = &drv->states[*cnt];
+
+	if (of_property_read_u32(state_node, "exit-latency-us",
+				 &idle_state->exit_latency)) {
+		pr_debug(" * %s missing exit-latency-us property\n",
+			     state_node->full_name);
+		return;
+	}
+
+	if (of_property_read_u32(state_node, "min-residency-us",
+				 &idle_state->target_residency)) {
+		pr_debug(" * %s missing min-residency-us property\n",
+			     state_node->full_name);
+		return;
+	}
+	/*
+	 * It is unknown to the idle driver if and when the tick_device
+	 * loses context when the CPU enters the idle states. To solve
+	 * this issue the tick device must be linked to a power domain
+	 * so that the idle driver can check on which states the device
+	 * loses its context. Current code takes the conservative choice
+	 * of defining the idle state as one where the tick device always
+	 * loses its context. On platforms where tick device never loses
+	 * its context (ie it is not a C3STOP device) this turns into
+	 * a nop. On platforms where the tick device does lose context in some
+	 * states, this code can be optimized, when power domain specifications
+	 * for ARM CPUs are finalized.
+	 */
+	idle_state->flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
+
+	strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN);
+	strncpy(idle_state->desc, state_node->name, CPUIDLE_NAME_LEN);
+
+	(*cnt)++;
+}
+
+static int __init init_idle_states(struct cpuidle_driver *drv,
+				   struct device_node *state_nodes[],
+				   unsigned int start_idx, bool init_nodes)
+{
+	struct state_elem *el;
+	struct list_head *curr, *tmp;
+	unsigned int cnt = start_idx;
+
+	list_for_each_entry(el, &head, list) {
+		/*
+		 * Check if the init function has to fill the
+		 * state_nodes array on behalf of the CPUidle driver.
+		 */
+		if (init_nodes)
+			state_nodes[cnt] = el->node;
+		/*
+		 * cnt is updated on return if a state was added.
+		 */
+		init_state_node(drv, el->node, &cnt);
+
+		if (cnt == CPUIDLE_STATE_MAX) {
+			pr_warn("State index reached static CPU idle state limit\n");
+			break;
+		}
+	}
+
+	drv->state_count = cnt;
+
+	list_for_each_safe(curr, tmp, &head) {
+		list_del(curr);
+		kfree(container_of(curr, struct state_elem, list));
+	}
+
+	/*
+	 * If no idle states are detected, return an error and let the idle
+	 * driver initialization fail accordingly.
+	 */
+	return (cnt > start_idx) ? 0 : -ENODATA;
+}
+
+static void __init add_idle_states(struct cpuidle_driver *drv,
+				   struct device_node *idle_states)
+{
+	struct device_node *state_node;
+
+	for_each_child_of_node(idle_states, state_node) {
+		if ((!of_device_is_compatible(state_node, "arm,idle-state"))) {
+			pr_warn(" * %s: children of /cpus/idle-states must be \"arm,idle-state\" compatible\n",
+				     state_node->full_name);
+			continue;
+		}
+		/*
+		 * If memory allocation fails, better bail out.
+		 * Initialized nodes are freed at initialization
+		 * completion in of_init_idle_driver().
+		 */
+		if ((add_state_node(drv->cpumask, state_node) == -ENOMEM))
+			break;
+	}
+	/*
+	 * Sort the states list before initializing the CPUidle driver
+	 * states array.
+	 */
+	list_sort(NULL, &head, state_cmp);
+}
+
+/*
+ * of_init_idle_driver - Parse the DT idle states and initialize the
+ *			 idle driver states array
+ *
+ * @drv:	  Pointer to CPU idle driver to be initialized
+ * @state_nodes:  Array of struct device_nodes to be initialized if
+ *		  init_nodes == true. Must be sized CPUIDLE_STATE_MAX
+ * @start_idx:    First idle state index to be initialized
+ * @init_nodes:   Boolean to request device nodes initialization
+ *
+ * Returns:
+ *	0 on success
+ *	<0 on failure
+ *
+ *	On success the states array in the cpuidle driver contains
+ *	initialized entries in the states array, starting from index start_idx.
+ *	If init_nodes == true, on success the state_nodes array is initialized
+ *	with idle state DT node pointers, starting from index start_idx,
+ *	in a 1:1 relation with the idle driver states array.
+ */
+int __init of_init_idle_driver(struct cpuidle_driver *drv,
+			       struct device_node *state_nodes[],
+			       unsigned int start_idx, bool init_nodes)
+{
+	struct device_node *idle_states_node;
+	int ret;
+
+	if (start_idx >= CPUIDLE_STATE_MAX) {
+		pr_warn("State index exceeds static CPU idle driver states array size\n");
+		return -EINVAL;
+	}
+
+	if (WARN(init_nodes && !state_nodes,
+		"Requested nodes stashing in an invalid nodes container\n"))
+		return -EINVAL;
+
+	idle_states_node = of_find_node_by_path("/cpus/idle-states");
+	if (!idle_states_node)
+		return -ENOENT;
+
+	add_idle_states(drv, idle_states_node);
+
+	ret = init_idle_states(drv, state_nodes, start_idx, init_nodes);
+
+	of_node_put(idle_states_node);
+
+	return ret;
+}
diff --git a/drivers/cpuidle/of_idle_states.h b/drivers/cpuidle/of_idle_states.h
new file mode 100644
index 000000000000..049f94ff4428
--- /dev/null
+++ b/drivers/cpuidle/of_idle_states.h
@@ -0,0 +1,8 @@
+#ifndef __OF_IDLE_STATES
+#define __OF_IDLE_STATES
+
+int __init of_init_idle_driver(struct cpuidle_driver *drv,
+			       struct device_node *state_nodes[],
+			       unsigned int start_idx,
+			       bool init_nodes);
+#endif
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index d0233cd18ffa..5985807e52c9 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -87,7 +87,8 @@ static irqreturn_t adc_jack_irq_thread(int irq, void *_data)
 {
 	struct adc_jack_data *data = _data;
 
-	schedule_delayed_work(&data->handler, data->handling_delay);
+	queue_delayed_work(system_power_efficient_wq,
+			   &data->handler, data->handling_delay);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 02bec32adde4..f874c30ddbff 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -56,7 +56,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
 {
 	struct gpio_extcon_data *extcon_data = dev_id;
 
-	schedule_delayed_work(&extcon_data->work,
+	queue_delayed_work(system_power_efficient_wq, &extcon_data->work,
 			      extcon_data->debounce_jiffies);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index b0fc7c79dfbb..0d32596ad092 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -36,4 +36,11 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE
 	  backend for pstore by default. This setting can be overridden
 	  using the efivars module's pstore_disable parameter.
 
+config EFI_PARAMS_FROM_FDT
+	bool
+	help
+	  Select this config option from the architecture Kconfig if
+	  the EFI runtime support gets system table address, memory
+          map address, and other parameters from the device tree.
+
 endmenu
diff --git a/drivers/firmware/efi/arm-stub.c b/drivers/firmware/efi/arm-stub.c
new file mode 100644
index 000000000000..41114ce03b01
--- /dev/null
+++ b/drivers/firmware/efi/arm-stub.c
@@ -0,0 +1,278 @@
+/*
+ * EFI stub implementation that is shared by arm and arm64 architectures.
+ * This should be #included by the EFI stub implementation files.
+ *
+ * Copyright (C) 2013,2014 Linaro Limited
+ *     Roy Franz <roy.franz@linaro.org
+ * Copyright (C) 2013 Red Hat, Inc.
+ *     Mark Salter <msalter@redhat.com>
+ *
+ * This file is part of the Linux kernel, and is made available under the
+ * terms of the GNU General Public License version 2.
+ *
+ */
+
+static int __init efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
+{
+	static efi_guid_t const var_guid __initconst = EFI_GLOBAL_VARIABLE_GUID;
+	static efi_char16_t const var_name[] __initconst = {
+		'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 };
+
+	efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable;
+	unsigned long size = sizeof(u8);
+	efi_status_t status;
+	u8 val;
+
+	status = f_getvar((efi_char16_t *)var_name, (efi_guid_t *)&var_guid,
+			  NULL, &size, &val);
+
+	switch (status) {
+	case EFI_SUCCESS:
+		return val;
+	case EFI_NOT_FOUND:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
+				    void *__image, void **__fh)
+{
+	efi_file_io_interface_t *io;
+	efi_loaded_image_t *image = __image;
+	efi_file_handle_t *fh;
+	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
+	efi_status_t status;
+	void *handle = (void *)(unsigned long)image->device_handle;
+
+	status = sys_table_arg->boottime->handle_protocol(handle,
+				 &fs_proto, (void **)&io);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
+		return status;
+	}
+
+	status = io->open_volume(io, &fh);
+	if (status != EFI_SUCCESS)
+		efi_printk(sys_table_arg, "Failed to open volume\n");
+
+	*__fh = fh;
+	return status;
+}
+static efi_status_t efi_file_close(void *handle)
+{
+	efi_file_handle_t *fh = handle;
+
+	return fh->close(handle);
+}
+
+static efi_status_t
+efi_file_read(void *handle, unsigned long *size, void *addr)
+{
+	efi_file_handle_t *fh = handle;
+
+	return fh->read(handle, size, addr);
+}
+
+
+static efi_status_t
+efi_file_size(efi_system_table_t *sys_table_arg, void *__fh,
+	      efi_char16_t *filename_16, void **handle, u64 *file_sz)
+{
+	efi_file_handle_t *h, *fh = __fh;
+	efi_file_info_t *info;
+	efi_status_t status;
+	efi_guid_t info_guid = EFI_FILE_INFO_ID;
+	unsigned long info_sz;
+
+	status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table_arg, "Failed to open file: ");
+		efi_char16_printk(sys_table_arg, filename_16);
+		efi_printk(sys_table_arg, "\n");
+		return status;
+	}
+
+	*handle = h;
+
+	info_sz = 0;
+	status = h->get_info(h, &info_guid, &info_sz, NULL);
+	if (status != EFI_BUFFER_TOO_SMALL) {
+		efi_printk(sys_table_arg, "Failed to get file info size\n");
+		return status;
+	}
+
+grow:
+	status = sys_table_arg->boottime->allocate_pool(EFI_LOADER_DATA,
+				 info_sz, (void **)&info);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table_arg, "Failed to alloc mem for file info\n");
+		return status;
+	}
+
+	status = h->get_info(h, &info_guid, &info_sz,
+						   info);
+	if (status == EFI_BUFFER_TOO_SMALL) {
+		sys_table_arg->boottime->free_pool(info);
+		goto grow;
+	}
+
+	*file_sz = info->file_size;
+	sys_table_arg->boottime->free_pool(info);
+
+	if (status != EFI_SUCCESS)
+		efi_printk(sys_table_arg, "Failed to get initrd info\n");
+
+	return status;
+}
+
+
+
+static void efi_char16_printk(efi_system_table_t *sys_table_arg,
+			      efi_char16_t *str)
+{
+	struct efi_simple_text_output_protocol *out;
+
+	out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out;
+	out->output_string(out, str);
+}
+
+
+/*
+ * This function handles the architcture specific differences between arm and
+ * arm64 regarding where the kernel image must be loaded and any memory that
+ * must be reserved. On failure it is required to free all
+ * all allocations it has made.
+ */
+static efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
+					unsigned long *image_addr,
+					unsigned long *image_size,
+					unsigned long *reserve_addr,
+					unsigned long *reserve_size,
+					unsigned long dram_base,
+					efi_loaded_image_t *image);
+/*
+ * EFI entry point for the arm/arm64 EFI stubs.  This is the entrypoint
+ * that is described in the PE/COFF header.  Most of the code is the same
+ * for both archictectures, with the arch-specific code provided in the
+ * handle_kernel_image() function.
+ */
+unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table,
+			       unsigned long *image_addr)
+{
+	efi_loaded_image_t *image;
+	efi_status_t status;
+	unsigned long image_size = 0;
+	unsigned long dram_base;
+	/* addr/point and size pairs for memory management*/
+	unsigned long initrd_addr;
+	u64 initrd_size = 0;
+	unsigned long fdt_addr = 0;  /* Original DTB */
+	u64 fdt_size = 0;  /* We don't get size from configuration table */
+	char *cmdline_ptr = NULL;
+	int cmdline_size = 0;
+	unsigned long new_fdt_addr;
+	efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID;
+	unsigned long reserve_addr = 0;
+	unsigned long reserve_size = 0;
+
+	/* Check if we were booted by the EFI firmware */
+	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+		goto fail;
+
+	pr_efi(sys_table, "Booting Linux Kernel...\n");
+
+	/*
+	 * Get a handle to the loaded image protocol.  This is used to get
+	 * information about the running image, such as size and the command
+	 * line.
+	 */
+	status = sys_table->boottime->handle_protocol(handle,
+					&loaded_image_proto, (void *)&image);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table, "Failed to get loaded image protocol\n");
+		goto fail;
+	}
+
+	dram_base = get_dram_base(sys_table);
+	if (dram_base == EFI_ERROR) {
+		pr_efi_err(sys_table, "Failed to find DRAM base\n");
+		goto fail;
+	}
+	status = handle_kernel_image(sys_table, image_addr, &image_size,
+				     &reserve_addr,
+				     &reserve_size,
+				     dram_base, image);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table, "Failed to relocate kernel\n");
+		goto fail;
+	}
+
+	/*
+	 * Get the command line from EFI, using the LOADED_IMAGE
+	 * protocol. We are going to copy the command line into the
+	 * device tree, so this can be allocated anywhere.
+	 */
+	cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size);
+	if (!cmdline_ptr) {
+		pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n");
+		goto fail_free_image;
+	}
+
+	/*
+	 * Unauthenticated device tree data is a security hazard, so
+	 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
+	 */
+	if (efi_secureboot_enabled(sys_table)) {
+		pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+	} else {
+		status = handle_cmdline_files(sys_table, image, cmdline_ptr,
+					      "dtb=",
+					      ~0UL, (unsigned long *)&fdt_addr,
+					      (unsigned long *)&fdt_size);
+
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table, "Failed to load device tree!\n");
+			goto fail_free_cmdline;
+		}
+	}
+	if (!fdt_addr)
+		/* Look for a device tree configuration table entry. */
+		fdt_addr = (uintptr_t)get_fdt(sys_table);
+
+	status = handle_cmdline_files(sys_table, image, cmdline_ptr,
+				      "initrd=", dram_base + SZ_512M,
+				      (unsigned long *)&initrd_addr,
+				      (unsigned long *)&initrd_size);
+	if (status != EFI_SUCCESS)
+		pr_efi_err(sys_table, "Failed initrd from command line!\n");
+
+	new_fdt_addr = fdt_addr;
+	status = allocate_new_fdt_and_exit_boot(sys_table, handle,
+				&new_fdt_addr, dram_base + MAX_FDT_OFFSET,
+				initrd_addr, initrd_size, cmdline_ptr,
+				fdt_addr, fdt_size);
+
+	/*
+	 * If all went well, we need to return the FDT address to the
+	 * calling function so it can be passed to kernel as part of
+	 * the kernel boot protocol.
+	 */
+	if (status == EFI_SUCCESS)
+		return new_fdt_addr;
+
+	pr_efi_err(sys_table, "Failed to update FDT and exit boot services\n");
+
+	efi_free(sys_table, initrd_size, initrd_addr);
+	efi_free(sys_table, fdt_size, fdt_addr);
+
+fail_free_cmdline:
+	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
+
+fail_free_image:
+	efi_free(sys_table, image_size, *image_addr);
+	efi_free(sys_table, reserve_size, reserve_addr);
+fail:
+	return EFI_ERROR;
+}
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 202d2c85ba2e..bf8dd3d5bee7 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
 
 static int efi_pstore_open(struct pstore_info *psi)
 {
-	efivar_entry_iter_begin();
 	psi->data = NULL;
 	return 0;
 }
 
 static int efi_pstore_close(struct pstore_info *psi)
 {
-	efivar_entry_iter_end();
 	psi->data = NULL;
 	return 0;
 }
@@ -78,19 +76,124 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
 	__efivar_entry_get(entry, &entry->var.Attributes,
 			   &entry->var.DataSize, entry->var.Data);
 	size = entry->var.DataSize;
+	memcpy(*cb_data->buf, entry->var.Data,
+	       (size_t)min_t(unsigned long, EFIVARS_DATA_SIZE_MAX, size));
 
-	*cb_data->buf = kmalloc(size, GFP_KERNEL);
-	if (*cb_data->buf == NULL)
-		return -ENOMEM;
-	memcpy(*cb_data->buf, entry->var.Data, size);
 	return size;
 }
 
+/**
+ * efi_pstore_scan_sysfs_enter
+ * @entry: scanning entry
+ * @next: next entry
+ * @head: list head
+ */
+static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
+					struct efivar_entry *next,
+					struct list_head *head)
+{
+	pos->scanning = true;
+	if (&next->list != head)
+		next->scanning = true;
+}
+
+/**
+ * __efi_pstore_scan_sysfs_exit
+ * @entry: deleting entry
+ * @turn_off_scanning: Check if a scanning flag should be turned off
+ */
+static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+						bool turn_off_scanning)
+{
+	if (entry->deleting) {
+		list_del(&entry->list);
+		efivar_entry_iter_end();
+		efivar_unregister(entry);
+		efivar_entry_iter_begin();
+	} else if (turn_off_scanning)
+		entry->scanning = false;
+}
+
+/**
+ * efi_pstore_scan_sysfs_exit
+ * @pos: scanning entry
+ * @next: next entry
+ * @head: list head
+ * @stop: a flag checking if scanning will stop
+ */
+static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+				       struct efivar_entry *next,
+				       struct list_head *head, bool stop)
+{
+	__efi_pstore_scan_sysfs_exit(pos, true);
+	if (stop)
+		__efi_pstore_scan_sysfs_exit(next, &next->list != head);
+}
+
+/**
+ * efi_pstore_sysfs_entry_iter
+ *
+ * @data: function-specific data to pass to callback
+ * @pos: entry to begin iterating from
+ *
+ * You MUST call efivar_enter_iter_begin() before this function, and
+ * efivar_entry_iter_end() afterwards.
+ *
+ * It is possible to begin iteration from an arbitrary entry within
+ * the list by passing @pos. @pos is updated on return to point to
+ * the next entry of the last one passed to efi_pstore_read_func().
+ * To begin iterating from the beginning of the list @pos must be %NULL.
+ */
+static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
+{
+	struct efivar_entry *entry, *n;
+	struct list_head *head = &efivar_sysfs_list;
+	int size = 0;
+
+	if (!*pos) {
+		list_for_each_entry_safe(entry, n, head, list) {
+			efi_pstore_scan_sysfs_enter(entry, n, head);
+
+			size = efi_pstore_read_func(entry, data);
+			efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+			if (size)
+				break;
+		}
+		*pos = n;
+		return size;
+	}
+
+	list_for_each_entry_safe_from((*pos), n, head, list) {
+		efi_pstore_scan_sysfs_enter((*pos), n, head);
+
+		size = efi_pstore_read_func((*pos), data);
+		efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+		if (size)
+			break;
+	}
+	*pos = n;
+	return size;
+}
+
+/**
+ * efi_pstore_read
+ *
+ * This function returns a size of NVRAM entry logged via efi_pstore_write().
+ * The meaning and behavior of efi_pstore/pstore are as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ *           and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ *            and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ *           and pstore will stop reading entry.
+ */
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 			       int *count, struct timespec *timespec,
 			       char **buf, struct pstore_info *psi)
 {
 	struct pstore_read_data data;
+	ssize_t size;
 
 	data.id = id;
 	data.type = type;
@@ -98,8 +201,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	data.timespec = timespec;
 	data.buf = buf;
 
-	return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
-				   (struct efivar_entry **)&psi->data);
+	*data.buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
+	if (!*data.buf)
+		return -ENOMEM;
+
+	efivar_entry_iter_begin();
+	size = efi_pstore_sysfs_entry_iter(&data,
+					   (struct efivar_entry **)&psi->data);
+	efivar_entry_iter_end();
+	if (size <= 0)
+		kfree(*data.buf);
+	return size;
 }
 
 static int efi_pstore_write(enum pstore_type_id type,
@@ -170,9 +282,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 			return 0;
 	}
 
+	if (entry->scanning) {
+		/*
+		 * Skip deletion because this entry will be deleted
+		 * after scanning is completed.
+		 */
+		entry->deleting = true;
+	} else
+		list_del(&entry->list);
+
 	/* found */
 	__efivar_entry_delete(entry);
-	list_del(&entry->list);
 
 	return 1;
 }
@@ -200,10 +320,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 
 	efivar_entry_iter_begin();
 	found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
-	efivar_entry_iter_end();
 
-	if (found)
+	if (found && !entry->scanning) {
+		efivar_entry_iter_end();
 		efivar_unregister(entry);
+	} else
+		efivar_entry_iter_end();
 
 	return 0;
 }
@@ -236,7 +358,11 @@ static __init int efivars_pstore_init(void)
 	efi_pstore_info.bufsize = 1024;
 	spin_lock_init(&efi_pstore_info.buf_lock);
 
-	pstore_register(&efi_pstore_info);
+	if (pstore_register(&efi_pstore_info)) {
+		kfree(efi_pstore_info.buf);
+		efi_pstore_info.buf = NULL;
+		efi_pstore_info.bufsize = 0;
+	}
 
 	return 0;
 }
diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c
new file mode 100644
index 000000000000..eb6d4be9e722
--- /dev/null
+++ b/drivers/firmware/efi/efi-stub-helper.c
@@ -0,0 +1,634 @@
+/*
+ * Helper functions used by the EFI stub on multiple
+ * architectures. This should be #included by the EFI stub
+ * implementation files.
+ *
+ * Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available
+ * under the terms of the GNU General Public License version 2.
+ *
+ */
+#define EFI_READ_CHUNK_SIZE	(1024 * 1024)
+
+/* error code which can't be mistaken for valid address */
+#define EFI_ERROR	(~0UL)
+
+
+struct file_info {
+	efi_file_handle_t *handle;
+	u64 size;
+};
+
+static void efi_printk(efi_system_table_t *sys_table_arg, char *str)
+{
+	char *s8;
+
+	for (s8 = str; *s8; s8++) {
+		efi_char16_t ch[2] = { 0 };
+
+		ch[0] = *s8;
+		if (*s8 == '\n') {
+			efi_char16_t nl[2] = { '\r', 0 };
+			efi_char16_printk(sys_table_arg, nl);
+		}
+
+		efi_char16_printk(sys_table_arg, ch);
+	}
+}
+
+#define pr_efi(sys_table, msg)     efi_printk(sys_table, "EFI stub: "msg)
+#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg)
+
+
+static efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
+				       efi_memory_desc_t **map,
+				       unsigned long *map_size,
+				       unsigned long *desc_size,
+				       u32 *desc_ver,
+				       unsigned long *key_ptr)
+{
+	efi_memory_desc_t *m = NULL;
+	efi_status_t status;
+	unsigned long key;
+	u32 desc_version;
+
+	*map_size = sizeof(*m) * 32;
+again:
+	/*
+	 * Add an additional efi_memory_desc_t because we're doing an
+	 * allocation which may be in a new descriptor region.
+	 */
+	*map_size += sizeof(*m);
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+				*map_size, (void **)&m);
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	*desc_size = 0;
+	key = 0;
+	status = efi_call_early(get_memory_map, map_size, m,
+				&key, desc_size, &desc_version);
+	if (status == EFI_BUFFER_TOO_SMALL) {
+		efi_call_early(free_pool, m);
+		goto again;
+	}
+
+	if (status != EFI_SUCCESS)
+		efi_call_early(free_pool, m);
+
+	if (key_ptr && status == EFI_SUCCESS)
+		*key_ptr = key;
+	if (desc_ver && status == EFI_SUCCESS)
+		*desc_ver = desc_version;
+
+fail:
+	*map = m;
+	return status;
+}
+
+
+static unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg)
+{
+	efi_status_t status;
+	unsigned long map_size;
+	unsigned long membase  = EFI_ERROR;
+	struct efi_memory_map map;
+	efi_memory_desc_t *md;
+
+	status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map,
+				    &map_size, &map.desc_size, NULL, NULL);
+	if (status != EFI_SUCCESS)
+		return membase;
+
+	map.map_end = map.map + map_size;
+
+	for_each_efi_memory_desc(&map, md)
+		if (md->attribute & EFI_MEMORY_WB)
+			if (membase > md->phys_addr)
+				membase = md->phys_addr;
+
+	efi_call_early(free_pool, map.map);
+
+	return membase;
+}
+
+/*
+ * Allocate at the highest possible address that is not above 'max'.
+ */
+static efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
+			       unsigned long size, unsigned long align,
+			       unsigned long *addr, unsigned long max)
+{
+	unsigned long map_size, desc_size;
+	efi_memory_desc_t *map;
+	efi_status_t status;
+	unsigned long nr_pages;
+	u64 max_addr = 0;
+	int i;
+
+	status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
+				    NULL, NULL);
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	/*
+	 * Enforce minimum alignment that EFI requires when requesting
+	 * a specific address.  We are doing page-based allocations,
+	 * so we must be aligned to a page.
+	 */
+	if (align < EFI_PAGE_SIZE)
+		align = EFI_PAGE_SIZE;
+
+	nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+again:
+	for (i = 0; i < map_size / desc_size; i++) {
+		efi_memory_desc_t *desc;
+		unsigned long m = (unsigned long)map;
+		u64 start, end;
+
+		desc = (efi_memory_desc_t *)(m + (i * desc_size));
+		if (desc->type != EFI_CONVENTIONAL_MEMORY)
+			continue;
+
+		if (desc->num_pages < nr_pages)
+			continue;
+
+		start = desc->phys_addr;
+		end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
+
+		if ((start + size) > end || (start + size) > max)
+			continue;
+
+		if (end - size > max)
+			end = max;
+
+		if (round_down(end - size, align) < start)
+			continue;
+
+		start = round_down(end - size, align);
+
+		/*
+		 * Don't allocate at 0x0. It will confuse code that
+		 * checks pointers against NULL.
+		 */
+		if (start == 0x0)
+			continue;
+
+		if (start > max_addr)
+			max_addr = start;
+	}
+
+	if (!max_addr)
+		status = EFI_NOT_FOUND;
+	else {
+		status = efi_call_early(allocate_pages,
+					EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+					nr_pages, &max_addr);
+		if (status != EFI_SUCCESS) {
+			max = max_addr;
+			max_addr = 0;
+			goto again;
+		}
+
+		*addr = max_addr;
+	}
+
+	efi_call_early(free_pool, map);
+fail:
+	return status;
+}
+
+/*
+ * Allocate at the lowest possible address.
+ */
+static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
+			      unsigned long size, unsigned long align,
+			      unsigned long *addr)
+{
+	unsigned long map_size, desc_size;
+	efi_memory_desc_t *map;
+	efi_status_t status;
+	unsigned long nr_pages;
+	int i;
+
+	status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
+				    NULL, NULL);
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	/*
+	 * Enforce minimum alignment that EFI requires when requesting
+	 * a specific address.  We are doing page-based allocations,
+	 * so we must be aligned to a page.
+	 */
+	if (align < EFI_PAGE_SIZE)
+		align = EFI_PAGE_SIZE;
+
+	nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+	for (i = 0; i < map_size / desc_size; i++) {
+		efi_memory_desc_t *desc;
+		unsigned long m = (unsigned long)map;
+		u64 start, end;
+
+		desc = (efi_memory_desc_t *)(m + (i * desc_size));
+
+		if (desc->type != EFI_CONVENTIONAL_MEMORY)
+			continue;
+
+		if (desc->num_pages < nr_pages)
+			continue;
+
+		start = desc->phys_addr;
+		end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
+
+		/*
+		 * Don't allocate at 0x0. It will confuse code that
+		 * checks pointers against NULL. Skip the first 8
+		 * bytes so we start at a nice even number.
+		 */
+		if (start == 0x0)
+			start += 8;
+
+		start = round_up(start, align);
+		if ((start + size) > end)
+			continue;
+
+		status = efi_call_early(allocate_pages,
+					EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+					nr_pages, &start);
+		if (status == EFI_SUCCESS) {
+			*addr = start;
+			break;
+		}
+	}
+
+	if (i == map_size / desc_size)
+		status = EFI_NOT_FOUND;
+
+	efi_call_early(free_pool, map);
+fail:
+	return status;
+}
+
+static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
+		     unsigned long addr)
+{
+	unsigned long nr_pages;
+
+	if (!size)
+		return;
+
+	nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+	efi_call_early(free_pages, addr, nr_pages);
+}
+
+
+/*
+ * Check the cmdline for a LILO-style file= arguments.
+ *
+ * We only support loading a file from the same filesystem as
+ * the kernel image.
+ */
+static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
+					 efi_loaded_image_t *image,
+					 char *cmd_line, char *option_string,
+					 unsigned long max_addr,
+					 unsigned long *load_addr,
+					 unsigned long *load_size)
+{
+	struct file_info *files;
+	unsigned long file_addr;
+	u64 file_size_total;
+	efi_file_handle_t *fh = NULL;
+	efi_status_t status;
+	int nr_files;
+	char *str;
+	int i, j, k;
+
+	file_addr = 0;
+	file_size_total = 0;
+
+	str = cmd_line;
+
+	j = 0;			/* See close_handles */
+
+	if (!load_addr || !load_size)
+		return EFI_INVALID_PARAMETER;
+
+	*load_addr = 0;
+	*load_size = 0;
+
+	if (!str || !*str)
+		return EFI_SUCCESS;
+
+	for (nr_files = 0; *str; nr_files++) {
+		str = strstr(str, option_string);
+		if (!str)
+			break;
+
+		str += strlen(option_string);
+
+		/* Skip any leading slashes */
+		while (*str == '/' || *str == '\\')
+			str++;
+
+		while (*str && *str != ' ' && *str != '\n')
+			str++;
+	}
+
+	if (!nr_files)
+		return EFI_SUCCESS;
+
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+				nr_files * sizeof(*files), (void **)&files);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table_arg, "Failed to alloc mem for file handle list\n");
+		goto fail;
+	}
+
+	str = cmd_line;
+	for (i = 0; i < nr_files; i++) {
+		struct file_info *file;
+		efi_char16_t filename_16[256];
+		efi_char16_t *p;
+
+		str = strstr(str, option_string);
+		if (!str)
+			break;
+
+		str += strlen(option_string);
+
+		file = &files[i];
+		p = filename_16;
+
+		/* Skip any leading slashes */
+		while (*str == '/' || *str == '\\')
+			str++;
+
+		while (*str && *str != ' ' && *str != '\n') {
+			if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
+				break;
+
+			if (*str == '/') {
+				*p++ = '\\';
+				str++;
+			} else {
+				*p++ = *str++;
+			}
+		}
+
+		*p = '\0';
+
+		/* Only open the volume once. */
+		if (!i) {
+			status = efi_open_volume(sys_table_arg, image,
+						 (void **)&fh);
+			if (status != EFI_SUCCESS)
+				goto free_files;
+		}
+
+		status = efi_file_size(sys_table_arg, fh, filename_16,
+				       (void **)&file->handle, &file->size);
+		if (status != EFI_SUCCESS)
+			goto close_handles;
+
+		file_size_total += file->size;
+	}
+
+	if (file_size_total) {
+		unsigned long addr;
+
+		/*
+		 * Multiple files need to be at consecutive addresses in memory,
+		 * so allocate enough memory for all the files.  This is used
+		 * for loading multiple files.
+		 */
+		status = efi_high_alloc(sys_table_arg, file_size_total, 0x1000,
+				    &file_addr, max_addr);
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table_arg, "Failed to alloc highmem for files\n");
+			goto close_handles;
+		}
+
+		/* We've run out of free low memory. */
+		if (file_addr > max_addr) {
+			pr_efi_err(sys_table_arg, "We've run out of free low memory\n");
+			status = EFI_INVALID_PARAMETER;
+			goto free_file_total;
+		}
+
+		addr = file_addr;
+		for (j = 0; j < nr_files; j++) {
+			unsigned long size;
+
+			size = files[j].size;
+			while (size) {
+				unsigned long chunksize;
+				if (size > EFI_READ_CHUNK_SIZE)
+					chunksize = EFI_READ_CHUNK_SIZE;
+				else
+					chunksize = size;
+
+				status = efi_file_read(files[j].handle,
+						       &chunksize,
+						       (void *)addr);
+				if (status != EFI_SUCCESS) {
+					pr_efi_err(sys_table_arg, "Failed to read file\n");
+					goto free_file_total;
+				}
+				addr += chunksize;
+				size -= chunksize;
+			}
+
+			efi_file_close(files[j].handle);
+		}
+
+	}
+
+	efi_call_early(free_pool, files);
+
+	*load_addr = file_addr;
+	*load_size = file_size_total;
+
+	return status;
+
+free_file_total:
+	efi_free(sys_table_arg, file_size_total, file_addr);
+
+close_handles:
+	for (k = j; k < i; k++)
+		efi_file_close(files[k].handle);
+free_files:
+	efi_call_early(free_pool, files);
+fail:
+	*load_addr = 0;
+	*load_size = 0;
+
+	return status;
+}
+/*
+ * Relocate a kernel image, either compressed or uncompressed.
+ * In the ARM64 case, all kernel images are currently
+ * uncompressed, and as such when we relocate it we need to
+ * allocate additional space for the BSS segment. Any low
+ * memory that this function should avoid needs to be
+ * unavailable in the EFI memory map, as if the preferred
+ * address is not available the lowest available address will
+ * be used.
+ */
+static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
+					unsigned long *image_addr,
+					unsigned long image_size,
+					unsigned long alloc_size,
+					unsigned long preferred_addr,
+					unsigned long alignment)
+{
+	unsigned long cur_image_addr;
+	unsigned long new_addr = 0;
+	efi_status_t status;
+	unsigned long nr_pages;
+	efi_physical_addr_t efi_addr = preferred_addr;
+
+	if (!image_addr || !image_size || !alloc_size)
+		return EFI_INVALID_PARAMETER;
+	if (alloc_size < image_size)
+		return EFI_INVALID_PARAMETER;
+
+	cur_image_addr = *image_addr;
+
+	/*
+	 * The EFI firmware loader could have placed the kernel image
+	 * anywhere in memory, but the kernel has restrictions on the
+	 * max physical address it can run at.  Some architectures
+	 * also have a prefered address, so first try to relocate
+	 * to the preferred address.  If that fails, allocate as low
+	 * as possible while respecting the required alignment.
+	 */
+	nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+	status = efi_call_early(allocate_pages,
+				EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
+				nr_pages, &efi_addr);
+	new_addr = efi_addr;
+	/*
+	 * If preferred address allocation failed allocate as low as
+	 * possible.
+	 */
+	if (status != EFI_SUCCESS) {
+		status = efi_low_alloc(sys_table_arg, alloc_size, alignment,
+				       &new_addr);
+	}
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n");
+		return status;
+	}
+
+	/*
+	 * We know source/dest won't overlap since both memory ranges
+	 * have been allocated by UEFI, so we can safely use memcpy.
+	 */
+	memcpy((void *)new_addr, (void *)cur_image_addr, image_size);
+
+	/* Return the new address of the relocated image. */
+	*image_addr = new_addr;
+
+	return status;
+}
+
+/*
+ * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
+ * This overestimates for surrogates, but that is okay.
+ */
+static int efi_utf8_bytes(u16 c)
+{
+	return 1 + (c >= 0x80) + (c >= 0x800);
+}
+
+/*
+ * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
+ */
+static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
+{
+	unsigned int c;
+
+	while (n--) {
+		c = *src++;
+		if (n && c >= 0xd800 && c <= 0xdbff &&
+		    *src >= 0xdc00 && *src <= 0xdfff) {
+			c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
+			src++;
+			n--;
+		}
+		if (c >= 0xd800 && c <= 0xdfff)
+			c = 0xfffd; /* Unmatched surrogate */
+		if (c < 0x80) {
+			*dst++ = c;
+			continue;
+		}
+		if (c < 0x800) {
+			*dst++ = 0xc0 + (c >> 6);
+			goto t1;
+		}
+		if (c < 0x10000) {
+			*dst++ = 0xe0 + (c >> 12);
+			goto t2;
+		}
+		*dst++ = 0xf0 + (c >> 18);
+		*dst++ = 0x80 + ((c >> 12) & 0x3f);
+	t2:
+		*dst++ = 0x80 + ((c >> 6) & 0x3f);
+	t1:
+		*dst++ = 0x80 + (c & 0x3f);
+	}
+
+	return dst;
+}
+
+/*
+ * Convert the unicode UEFI command line to ASCII to pass to kernel.
+ * Size of memory allocated return in *cmd_line_len.
+ * Returns NULL on error.
+ */
+static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+				 efi_loaded_image_t *image,
+				 int *cmd_line_len)
+{
+	const u16 *s2;
+	u8 *s1 = NULL;
+	unsigned long cmdline_addr = 0;
+	int load_options_chars = image->load_options_size / 2; /* UTF-16 */
+	const u16 *options = image->load_options;
+	int options_bytes = 0;  /* UTF-8 bytes */
+	int options_chars = 0;  /* UTF-16 chars */
+	efi_status_t status;
+	u16 zero = 0;
+
+	if (options) {
+		s2 = options;
+		while (*s2 && *s2 != '\n'
+		       && options_chars < load_options_chars) {
+			options_bytes += efi_utf8_bytes(*s2++);
+			options_chars++;
+		}
+	}
+
+	if (!options_chars) {
+		/* No command line options, so return empty string*/
+		options = &zero;
+	}
+
+	options_bytes++;	/* NUL termination */
+
+	status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
+	if (status != EFI_SUCCESS)
+		return NULL;
+
+	s1 = (u8 *)cmdline_addr;
+	s2 = (const u16 *)options;
+
+	s1 = efi_utf16_to_utf8(s1, s2, options_chars);
+	*s1 = '\0';
+
+	*cmd_line_len = options_bytes;
+	return (char *)cmdline_addr;
+}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 5145fa344ad5..87f2890974a4 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -13,11 +13,29 @@
  * This file is released under the GPLv2.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/efi.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/io.h>
+
+struct efi __read_mostly efi = {
+	.mps        = EFI_INVALID_TABLE_ADDR,
+	.acpi       = EFI_INVALID_TABLE_ADDR,
+	.acpi20     = EFI_INVALID_TABLE_ADDR,
+	.smbios     = EFI_INVALID_TABLE_ADDR,
+	.sal_systab = EFI_INVALID_TABLE_ADDR,
+	.boot_info  = EFI_INVALID_TABLE_ADDR,
+	.hcdp       = EFI_INVALID_TABLE_ADDR,
+	.uga        = EFI_INVALID_TABLE_ADDR,
+	.uv_systab  = EFI_INVALID_TABLE_ADDR,
+};
+EXPORT_SYMBOL(efi);
 
 static struct kobject *efi_kobj;
 static struct kobject *efivars_kobj;
@@ -132,3 +150,212 @@ err_put:
 }
 
 subsys_initcall(efisubsys_init);
+
+
+/*
+ * We can't ioremap data in EFI boot services RAM, because we've already mapped
+ * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
+ * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+ */
+void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+{
+	struct efi_memory_map *map;
+	void *p;
+	map = efi.memmap;
+	if (!map)
+		return NULL;
+	if (WARN_ON(!map->map))
+		return NULL;
+	for (p = map->map; p < map->map_end; p += map->desc_size) {
+		efi_memory_desc_t *md = p;
+		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		u64 end = md->phys_addr + size;
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+		if (!md->virt_addr)
+			continue;
+		if (phys_addr >= md->phys_addr && phys_addr < end) {
+			phys_addr += md->virt_addr - md->phys_addr;
+			return (__force void __iomem *)(unsigned long)phys_addr;
+		}
+	}
+	return NULL;
+}
+
+static __initdata efi_config_table_type_t common_tables[] = {
+	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
+	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
+	{HCDP_TABLE_GUID, "HCDP", &efi.hcdp},
+	{MPS_TABLE_GUID, "MPS", &efi.mps},
+	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
+	{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
+	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
+	{NULL_GUID, NULL, NULL},
+};
+
+static __init int match_config_table(efi_guid_t *guid,
+				     unsigned long table,
+				     efi_config_table_type_t *table_types)
+{
+	u8 str[EFI_VARIABLE_GUID_LEN + 1];
+	int i;
+
+	if (table_types) {
+		efi_guid_unparse(guid, str);
+
+		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
+			efi_guid_unparse(&table_types[i].guid, str);
+
+			if (!efi_guidcmp(*guid, table_types[i].guid)) {
+				*(table_types[i].ptr) = table;
+				pr_cont(" %s=0x%lx ",
+					table_types[i].name, table);
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int __init efi_config_init(efi_config_table_type_t *arch_tables)
+{
+	void *config_tables, *tablep;
+	int i, sz;
+
+	if (efi_enabled(EFI_64BIT))
+		sz = sizeof(efi_config_table_64_t);
+	else
+		sz = sizeof(efi_config_table_32_t);
+
+	/*
+	 * Let's see what config tables the firmware passed to us.
+	 */
+	config_tables = early_memremap(efi.systab->tables,
+				       efi.systab->nr_tables * sz);
+	if (config_tables == NULL) {
+		pr_err("Could not map Configuration table!\n");
+		return -ENOMEM;
+	}
+
+	tablep = config_tables;
+	pr_info("");
+	for (i = 0; i < efi.systab->nr_tables; i++) {
+		efi_guid_t guid;
+		unsigned long table;
+
+		if (efi_enabled(EFI_64BIT)) {
+			u64 table64;
+			guid = ((efi_config_table_64_t *)tablep)->guid;
+			table64 = ((efi_config_table_64_t *)tablep)->table;
+			table = table64;
+#ifndef CONFIG_64BIT
+			if (table64 >> 32) {
+				pr_cont("\n");
+				pr_err("Table located above 4GB, disabling EFI.\n");
+				early_iounmap(config_tables,
+					       efi.systab->nr_tables * sz);
+				return -EINVAL;
+			}
+#endif
+		} else {
+			guid = ((efi_config_table_32_t *)tablep)->guid;
+			table = ((efi_config_table_32_t *)tablep)->table;
+		}
+
+		if (!match_config_table(&guid, table, common_tables))
+			match_config_table(&guid, table, arch_tables);
+
+		tablep += sz;
+	}
+	pr_cont("\n");
+	early_iounmap(config_tables, efi.systab->nr_tables * sz);
+	return 0;
+}
+
+#ifdef CONFIG_EFI_PARAMS_FROM_FDT
+
+#define UEFI_PARAM(name, prop, field)			   \
+	{						   \
+		{ name },				   \
+		{ prop },				   \
+		offsetof(struct efi_fdt_params, field),    \
+		FIELD_SIZEOF(struct efi_fdt_params, field) \
+	}
+
+static __initdata struct {
+	const char name[32];
+	const char propname[32];
+	int offset;
+	int size;
+} dt_params[] = {
+	UEFI_PARAM("System Table", "linux,uefi-system-table", system_table),
+	UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap),
+	UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size),
+	UEFI_PARAM("MemMap Desc. Size", "linux,uefi-mmap-desc-size", desc_size),
+	UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver)
+};
+
+struct param_info {
+	int verbose;
+	int found;
+	void *params;
+};
+
+static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
+				       int depth, void *data)
+{
+	struct param_info *info = data;
+	const void *prop;
+	void *dest;
+	u64 val;
+	int i, len;
+
+	if (depth != 1 ||
+	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
+		prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
+		if (!prop)
+			return 0;
+		dest = info->params + dt_params[i].offset;
+		info->found++;
+
+		val = of_read_number(prop, len / sizeof(u32));
+
+		if (dt_params[i].size == sizeof(u32))
+			*(u32 *)dest = val;
+		else
+			*(u64 *)dest = val;
+
+		if (info->verbose)
+			pr_info("  %s: 0x%0*llx\n", dt_params[i].name,
+				dt_params[i].size * 2, val);
+	}
+	return 1;
+}
+
+int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
+{
+	struct param_info info;
+	int ret;
+
+	pr_info("Getting EFI parameters from FDT:\n");
+
+	info.verbose = verbose;
+	info.found = 0;
+	info.params = params;
+
+	ret = of_scan_flat_dt(fdt_find_uefi_params, &info);
+	if (!info.found)
+		pr_info("UEFI not found.\n");
+	else if (!ret)
+		pr_err("Can't find '%s' in device tree!\n",
+		       dt_params[info.found].name);
+
+	return ret;
+}
+#endif /* CONFIG_EFI_PARAMS_FROM_FDT */
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 8bd1bb6dbe47..463c56545ae8 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -69,6 +69,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/ucs2_string.h>
+#include <linux/compat.h>
 
 #define EFIVARS_VERSION "0.08"
 #define EFIVARS_DATE "2004-May-17"
@@ -86,6 +87,15 @@ static struct kset *efivars_kset;
 static struct bin_attribute *efivars_new_var;
 static struct bin_attribute *efivars_del_var;
 
+struct compat_efi_variable {
+	efi_char16_t  VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
+	efi_guid_t    VendorGuid;
+	__u32         DataSize;
+	__u8          Data[1024];
+	__u32         Status;
+	__u32         Attributes;
+} __packed;
+
 struct efivar_attribute {
 	struct attribute attr;
 	ssize_t (*show) (struct efivar_entry *entry, char *buf);
@@ -189,45 +199,107 @@ efivar_data_read(struct efivar_entry *entry, char *buf)
 	memcpy(buf, var->Data, var->DataSize);
 	return var->DataSize;
 }
-/*
- * We allow each variable to be edited via rewriting the
- * entire efi variable structure.
- */
-static ssize_t
-efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
-{
-	struct efi_variable *new_var, *var = &entry->var;
-	int err;
-
-	if (count != sizeof(struct efi_variable))
-		return -EINVAL;
 
-	new_var = (struct efi_variable *)buf;
+static inline int
+sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
+	     unsigned long size, u32 attributes, u8 *data)
+{
 	/*
 	 * If only updating the variable data, then the name
 	 * and guid should remain the same
 	 */
-	if (memcmp(new_var->VariableName, var->VariableName, sizeof(var->VariableName)) ||
-		efi_guidcmp(new_var->VendorGuid, var->VendorGuid)) {
+	if (memcmp(name, var->VariableName, sizeof(var->VariableName)) ||
+		efi_guidcmp(vendor, var->VendorGuid)) {
 		printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n");
 		return -EINVAL;
 	}
 
-	if ((new_var->DataSize <= 0) || (new_var->Attributes == 0)){
+	if ((size <= 0) || (attributes == 0)){
 		printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n");
 		return -EINVAL;
 	}
 
-	if ((new_var->Attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(new_var, new_var->Data, new_var->DataSize) == false) {
+	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
+	    efivar_validate(name, data, size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
 
-	memcpy(&entry->var, new_var, count);
+	return 0;
+}
 
-	err = efivar_entry_set(entry, new_var->Attributes,
-			       new_var->DataSize, new_var->Data, false);
+static inline bool is_compat(void)
+{
+	if (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())
+		return true;
+
+	return false;
+}
+
+static void
+copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
+{
+	memcpy(dst->VariableName, src->VariableName, EFI_VAR_NAME_LEN);
+	memcpy(dst->Data, src->Data, sizeof(src->Data));
+
+	dst->VendorGuid = src->VendorGuid;
+	dst->DataSize = src->DataSize;
+	dst->Attributes = src->Attributes;
+}
+
+/*
+ * We allow each variable to be edited via rewriting the
+ * entire efi variable structure.
+ */
+static ssize_t
+efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
+{
+	struct efi_variable *new_var, *var = &entry->var;
+	efi_char16_t *name;
+	unsigned long size;
+	efi_guid_t vendor;
+	u32 attributes;
+	u8 *data;
+	int err;
+
+	if (is_compat()) {
+		struct compat_efi_variable *compat;
+
+		if (count != sizeof(*compat))
+			return -EINVAL;
+
+		compat = (struct compat_efi_variable *)buf;
+		attributes = compat->Attributes;
+		vendor = compat->VendorGuid;
+		name = compat->VariableName;
+		size = compat->DataSize;
+		data = compat->Data;
+
+		err = sanity_check(var, name, vendor, size, attributes, data);
+		if (err)
+			return err;
+
+		copy_out_compat(&entry->var, compat);
+	} else {
+		if (count != sizeof(struct efi_variable))
+			return -EINVAL;
+
+		new_var = (struct efi_variable *)buf;
+
+		attributes = new_var->Attributes;
+		vendor = new_var->VendorGuid;
+		name = new_var->VariableName;
+		size = new_var->DataSize;
+		data = new_var->Data;
+
+		err = sanity_check(var, name, vendor, size, attributes, data);
+		if (err)
+			return err;
+
+		memcpy(&entry->var, new_var, count);
+	}
+
+	err = efivar_entry_set(entry, attributes, size, data, NULL);
 	if (err) {
 		printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err);
 		return -EIO;
@@ -240,6 +312,8 @@ static ssize_t
 efivar_show_raw(struct efivar_entry *entry, char *buf)
 {
 	struct efi_variable *var = &entry->var;
+	struct compat_efi_variable *compat;
+	size_t size;
 
 	if (!entry || !buf)
 		return 0;
@@ -249,9 +323,23 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
 			     &entry->var.DataSize, entry->var.Data))
 		return -EIO;
 
-	memcpy(buf, var, sizeof(*var));
+	if (is_compat()) {
+		compat = (struct compat_efi_variable *)buf;
 
-	return sizeof(*var);
+		size = sizeof(*compat);
+		memcpy(compat->VariableName, var->VariableName,
+			EFI_VAR_NAME_LEN);
+		memcpy(compat->Data, var->Data, sizeof(compat->Data));
+
+		compat->VendorGuid = var->VendorGuid;
+		compat->DataSize = var->DataSize;
+		compat->Attributes = var->Attributes;
+	} else {
+		size = sizeof(*var);
+		memcpy(buf, var, size);
+	}
+
+	return size;
 }
 
 /*
@@ -326,15 +414,39 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
+	struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
 	struct efi_variable *new_var = (struct efi_variable *)buf;
 	struct efivar_entry *new_entry;
+	bool need_compat = is_compat();
+	efi_char16_t *name;
+	unsigned long size;
+	u32 attributes;
+	u8 *data;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	if ((new_var->Attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(new_var, new_var->Data, new_var->DataSize) == false) {
+	if (need_compat) {
+		if (count != sizeof(*compat))
+			return -EINVAL;
+
+		attributes = compat->Attributes;
+		name = compat->VariableName;
+		size = compat->DataSize;
+		data = compat->Data;
+	} else {
+		if (count != sizeof(*new_var))
+			return -EINVAL;
+
+		attributes = new_var->Attributes;
+		name = new_var->VariableName;
+		size = new_var->DataSize;
+		data = new_var->Data;
+	}
+
+	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
+	    efivar_validate(name, data, size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
@@ -343,10 +455,13 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 	if (!new_entry)
 		return -ENOMEM;
 
-	memcpy(&new_entry->var, new_var, sizeof(*new_var));
+	if (need_compat)
+		copy_out_compat(&new_entry->var, compat);
+	else
+		memcpy(&new_entry->var, new_var, sizeof(*new_var));
 
-	err = efivar_entry_set(new_entry, new_var->Attributes, new_var->DataSize,
-			       new_var->Data, &efivar_sysfs_list);
+	err = efivar_entry_set(new_entry, attributes, size,
+			       data, &efivar_sysfs_list);
 	if (err) {
 		if (err == -EEXIST)
 			err = -EINVAL;
@@ -369,26 +484,47 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 			     char *buf, loff_t pos, size_t count)
 {
 	struct efi_variable *del_var = (struct efi_variable *)buf;
+	struct compat_efi_variable *compat;
 	struct efivar_entry *entry;
+	efi_char16_t *name;
+	efi_guid_t vendor;
 	int err = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
+	if (is_compat()) {
+		if (count != sizeof(*compat))
+			return -EINVAL;
+
+		compat = (struct compat_efi_variable *)buf;
+		name = compat->VariableName;
+		vendor = compat->VendorGuid;
+	} else {
+		if (count != sizeof(*del_var))
+			return -EINVAL;
+
+		name = del_var->VariableName;
+		vendor = del_var->VendorGuid;
+	}
+
 	efivar_entry_iter_begin();
-	entry = efivar_entry_find(del_var->VariableName, del_var->VendorGuid,
-				  &efivar_sysfs_list, true);
+	entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
 	if (!entry)
 		err = -EINVAL;
 	else if (__efivar_entry_delete(entry))
 		err = -EIO;
 
-	efivar_entry_iter_end();
-
-	if (err)
+	if (err) {
+		efivar_entry_iter_end();
 		return err;
+	}
 
-	efivar_unregister(entry);
+	if (!entry->scanning) {
+		efivar_entry_iter_end();
+		efivar_unregister(entry);
+	} else
+		efivar_entry_iter_end();
 
 	/* It's dead Jim.... */
 	return count;
@@ -564,7 +700,7 @@ static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
 	return 0;
 }
 
-void efivars_sysfs_exit(void)
+static void efivars_sysfs_exit(void)
 {
 	/* Remove all entries and destroy */
 	__efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list, NULL, NULL);
@@ -583,6 +719,9 @@ int efivars_sysfs_init(void)
 	struct kobject *parent_kobj = efivars_kobject();
 	int error = 0;
 
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
 	/* No efivars has been registered yet */
 	if (!parent_kobj)
 		return 0;
diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c
new file mode 100644
index 000000000000..507a3df46a5d
--- /dev/null
+++ b/drivers/firmware/efi/fdt.c
@@ -0,0 +1,275 @@
+/*
+ * FDT related Helper functions used by the EFI stub on multiple
+ * architectures. This should be #included by the EFI stub
+ * implementation files.
+ *
+ * Copyright 2013 Linaro Limited; author Roy Franz
+ *
+ * This file is part of the Linux kernel, and is made available
+ * under the terms of the GNU General Public License version 2.
+ *
+ */
+
+static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			       unsigned long orig_fdt_size,
+			       void *fdt, int new_fdt_size, char *cmdline_ptr,
+			       u64 initrd_addr, u64 initrd_size,
+			       efi_memory_desc_t *memory_map,
+			       unsigned long map_size, unsigned long desc_size,
+			       u32 desc_ver)
+{
+	int node, prev;
+	int status;
+	u32 fdt_val32;
+	u64 fdt_val64;
+
+	/* Do some checks on provided FDT, if it exists*/
+	if (orig_fdt) {
+		if (fdt_check_header(orig_fdt)) {
+			pr_efi_err(sys_table, "Device Tree header not valid!\n");
+			return EFI_LOAD_ERROR;
+		}
+		/*
+		 * We don't get the size of the FDT if we get if from a
+		 * configuration table.
+		 */
+		if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) {
+			pr_efi_err(sys_table, "Truncated device tree! foo!\n");
+			return EFI_LOAD_ERROR;
+		}
+	}
+
+	if (orig_fdt)
+		status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
+	else
+		status = fdt_create_empty_tree(fdt, new_fdt_size);
+
+	if (status != 0)
+		goto fdt_set_fail;
+
+	/*
+	 * Delete any memory nodes present. We must delete nodes which
+	 * early_init_dt_scan_memory may try to use.
+	 */
+	prev = 0;
+	for (;;) {
+		const char *type;
+		int len;
+
+		node = fdt_next_node(fdt, prev, NULL);
+		if (node < 0)
+			break;
+
+		type = fdt_getprop(fdt, node, "device_type", &len);
+		if (type && strncmp(type, "memory", len) == 0) {
+			fdt_del_node(fdt, node);
+			continue;
+		}
+
+		prev = node;
+	}
+
+	node = fdt_subnode_offset(fdt, 0, "chosen");
+	if (node < 0) {
+		node = fdt_add_subnode(fdt, 0, "chosen");
+		if (node < 0) {
+			status = node; /* node is error code when negative */
+			goto fdt_set_fail;
+		}
+	}
+
+	if ((cmdline_ptr != NULL) && (strlen(cmdline_ptr) > 0)) {
+		status = fdt_setprop(fdt, node, "bootargs", cmdline_ptr,
+				     strlen(cmdline_ptr) + 1);
+		if (status)
+			goto fdt_set_fail;
+	}
+
+	/* Set initrd address/end in device tree, if present */
+	if (initrd_size != 0) {
+		u64 initrd_image_end;
+		u64 initrd_image_start = cpu_to_fdt64(initrd_addr);
+
+		status = fdt_setprop(fdt, node, "linux,initrd-start",
+				     &initrd_image_start, sizeof(u64));
+		if (status)
+			goto fdt_set_fail;
+		initrd_image_end = cpu_to_fdt64(initrd_addr + initrd_size);
+		status = fdt_setprop(fdt, node, "linux,initrd-end",
+				     &initrd_image_end, sizeof(u64));
+		if (status)
+			goto fdt_set_fail;
+	}
+
+	/* Add FDT entries for EFI runtime services in chosen node. */
+	node = fdt_subnode_offset(fdt, 0, "chosen");
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)sys_table);
+	status = fdt_setprop(fdt, node, "linux,uefi-system-table",
+			     &fdt_val64, sizeof(fdt_val64));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
+			     &fdt_val64,  sizeof(fdt_val64));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(map_size);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-size",
+			     &fdt_val32,  sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(desc_size);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size",
+			     &fdt_val32, sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(desc_ver);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver",
+			     &fdt_val32, sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	/*
+	 * Add kernel version banner so stub/kernel match can be
+	 * verified.
+	 */
+	status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver",
+			     linux_banner);
+	if (status)
+		goto fdt_set_fail;
+
+	return EFI_SUCCESS;
+
+fdt_set_fail:
+	if (status == -FDT_ERR_NOSPACE)
+		return EFI_BUFFER_TOO_SMALL;
+
+	return EFI_LOAD_ERROR;
+}
+
+#ifndef EFI_FDT_ALIGN
+#define EFI_FDT_ALIGN EFI_PAGE_SIZE
+#endif
+
+/*
+ * Allocate memory for a new FDT, then add EFI, commandline, and
+ * initrd related fields to the FDT.  This routine increases the
+ * FDT allocation size until the allocated memory is large
+ * enough.  EFI allocations are in EFI_PAGE_SIZE granules,
+ * which are fixed at 4K bytes, so in most cases the first
+ * allocation should succeed.
+ * EFI boot services are exited at the end of this function.
+ * There must be no allocations between the get_memory_map()
+ * call and the exit_boot_services() call, so the exiting of
+ * boot services is very tightly tied to the creation of the FDT
+ * with the final memory map in it.
+ */
+
+efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+					    void *handle,
+					    unsigned long *new_fdt_addr,
+					    unsigned long max_addr,
+					    u64 initrd_addr, u64 initrd_size,
+					    char *cmdline_ptr,
+					    unsigned long fdt_addr,
+					    unsigned long fdt_size)
+{
+	unsigned long map_size, desc_size;
+	u32 desc_ver;
+	unsigned long mmap_key;
+	efi_memory_desc_t *memory_map;
+	unsigned long new_fdt_size;
+	efi_status_t status;
+
+	/*
+	 * Estimate size of new FDT, and allocate memory for it. We
+	 * will allocate a bigger buffer if this ends up being too
+	 * small, so a rough guess is OK here.
+	 */
+	new_fdt_size = fdt_size + EFI_PAGE_SIZE;
+	while (1) {
+		status = efi_high_alloc(sys_table, new_fdt_size, EFI_FDT_ALIGN,
+					new_fdt_addr, max_addr);
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n");
+			goto fail;
+		}
+
+		/*
+		 * Now that we have done our final memory allocation (and free)
+		 * we can get the memory map key  needed for
+		 * exit_boot_services().
+		 */
+		status = efi_get_memory_map(sys_table, &memory_map, &map_size,
+					    &desc_size, &desc_ver, &mmap_key);
+		if (status != EFI_SUCCESS)
+			goto fail_free_new_fdt;
+
+		status = update_fdt(sys_table,
+				    (void *)fdt_addr, fdt_size,
+				    (void *)*new_fdt_addr, new_fdt_size,
+				    cmdline_ptr, initrd_addr, initrd_size,
+				    memory_map, map_size, desc_size, desc_ver);
+
+		/* Succeeding the first time is the expected case. */
+		if (status == EFI_SUCCESS)
+			break;
+
+		if (status == EFI_BUFFER_TOO_SMALL) {
+			/*
+			 * We need to allocate more space for the new
+			 * device tree, so free existing buffer that is
+			 * too small.  Also free memory map, as we will need
+			 * to get new one that reflects the free/alloc we do
+			 * on the device tree buffer.
+			 */
+			efi_free(sys_table, new_fdt_size, *new_fdt_addr);
+			sys_table->boottime->free_pool(memory_map);
+			new_fdt_size += EFI_PAGE_SIZE;
+		} else {
+			pr_efi_err(sys_table, "Unable to constuct new device tree.\n");
+			goto fail_free_mmap;
+		}
+	}
+
+	/* Now we are ready to exit_boot_services.*/
+	status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+
+
+	if (status == EFI_SUCCESS)
+		return status;
+
+	pr_efi_err(sys_table, "Exit boot services failed.\n");
+
+fail_free_mmap:
+	sys_table->boottime->free_pool(memory_map);
+
+fail_free_new_fdt:
+	efi_free(sys_table, new_fdt_size, *new_fdt_addr);
+
+fail:
+	return EFI_LOAD_ERROR;
+}
+
+static void *get_fdt(efi_system_table_t *sys_table)
+{
+	efi_guid_t fdt_guid = DEVICE_TREE_GUID;
+	efi_config_table_t *tables;
+	void *fdt;
+	int i;
+
+	tables = (efi_config_table_t *) sys_table->tables;
+	fdt = NULL;
+
+	for (i = 0; i < sys_table->nr_tables; i++)
+		if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) {
+			fdt = (void *) tables[i].table;
+			break;
+	 }
+
+	return fdt;
+}
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 7dbc319e1cf5..5abe943e3404 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -42,7 +42,7 @@ DECLARE_WORK(efivar_work, NULL);
 EXPORT_SYMBOL_GPL(efivar_work);
 
 static bool
-validate_device_path(struct efi_variable *var, int match, u8 *buffer,
+validate_device_path(efi_char16_t *var_name, int match, u8 *buffer,
 		     unsigned long len)
 {
 	struct efi_generic_dev_path *node;
@@ -75,7 +75,7 @@ validate_device_path(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_boot_order(struct efi_variable *var, int match, u8 *buffer,
+validate_boot_order(efi_char16_t *var_name, int match, u8 *buffer,
 		    unsigned long len)
 {
 	/* An array of 16-bit integers */
@@ -86,18 +86,18 @@ validate_boot_order(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_load_option(struct efi_variable *var, int match, u8 *buffer,
+validate_load_option(efi_char16_t *var_name, int match, u8 *buffer,
 		     unsigned long len)
 {
 	u16 filepathlength;
 	int i, desclength = 0, namelen;
 
-	namelen = ucs2_strnlen(var->VariableName, sizeof(var->VariableName));
+	namelen = ucs2_strnlen(var_name, EFI_VAR_NAME_LEN);
 
 	/* Either "Boot" or "Driver" followed by four digits of hex */
 	for (i = match; i < match+4; i++) {
-		if (var->VariableName[i] > 127 ||
-		    hex_to_bin(var->VariableName[i] & 0xff) < 0)
+		if (var_name[i] > 127 ||
+		    hex_to_bin(var_name[i] & 0xff) < 0)
 			return true;
 	}
 
@@ -132,12 +132,12 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer,
 	/*
 	 * And, finally, check the filepath
 	 */
-	return validate_device_path(var, match, buffer + desclength + 6,
+	return validate_device_path(var_name, match, buffer + desclength + 6,
 				    filepathlength);
 }
 
 static bool
-validate_uint16(struct efi_variable *var, int match, u8 *buffer,
+validate_uint16(efi_char16_t *var_name, int match, u8 *buffer,
 		unsigned long len)
 {
 	/* A single 16-bit integer */
@@ -148,7 +148,7 @@ validate_uint16(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_ascii_string(struct efi_variable *var, int match, u8 *buffer,
+validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer,
 		      unsigned long len)
 {
 	int i;
@@ -166,7 +166,7 @@ validate_ascii_string(struct efi_variable *var, int match, u8 *buffer,
 
 struct variable_validate {
 	char *name;
-	bool (*validate)(struct efi_variable *var, int match, u8 *data,
+	bool (*validate)(efi_char16_t *var_name, int match, u8 *data,
 			 unsigned long len);
 };
 
@@ -189,10 +189,10 @@ static const struct variable_validate variable_validate[] = {
 };
 
 bool
-efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
+efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len)
 {
 	int i;
-	u16 *unicode_name = var->VariableName;
+	u16 *unicode_name = var_name;
 
 	for (i = 0; variable_validate[i].validate != NULL; i++) {
 		const char *name = variable_validate[i].name;
@@ -208,7 +208,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
 
 			/* Wildcard in the matching name means we've matched */
 			if (c == '*')
-				return variable_validate[i].validate(var,
+				return variable_validate[i].validate(var_name,
 							     match, data, len);
 
 			/* Case sensitive match */
@@ -217,7 +217,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
 
 			/* Reached the end of the string while matching */
 			if (!c)
-				return variable_validate[i].validate(var,
+				return variable_validate[i].validate(var_name,
 							     match, data, len);
 		}
 	}
@@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 	if (!found)
 		return NULL;
 
-	if (remove)
-		list_del(&entry->list);
+	if (remove) {
+		if (entry->scanning) {
+			/*
+			 * The entry will be deleted
+			 * after scanning is completed.
+			 */
+			entry->deleting = true;
+		} else
+			list_del(&entry->list);
+	}
 
 	return entry;
 }
@@ -797,7 +805,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes,
 
 	*set = false;
 
-	if (efivar_validate(&entry->var, data, *size) == false)
+	if (efivar_validate(name, data, *size) == false)
 		return -EINVAL;
 
 	/*
diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig
new file mode 100644
index 000000000000..b2358bbc1293
--- /dev/null
+++ b/drivers/gator/Kconfig
@@ -0,0 +1,39 @@
+config GATOR
+	tristate "Gator module for ARM's Streamline Performance Analyzer"
+	default m if (ARM || ARM64)
+	depends on PROFILING
+	depends on HIGH_RES_TIMERS
+	depends on LOCAL_TIMERS || !(ARM && SMP)
+	depends on PERF_EVENTS
+	depends on HW_PERF_EVENTS || !(ARM || ARM64)
+	select TRACING
+	help
+	  Gator module for ARM's Streamline Performance Analyzer
+
+config GATOR_WITH_MALI_SUPPORT
+	bool
+
+choice
+	prompt "Enable Mali GPU support in Gator"
+	depends on GATOR
+	optional
+	help
+	  Enable Mali GPU support in Gator
+
+config GATOR_MALI_4XXMP
+	bool "Mali-400MP or Mali-450MP"
+	select GATOR_WITH_MALI_SUPPORT
+
+config GATOR_MALI_MIDGARD
+	bool "Mali-T60x, Mali-T62x, Mali-T72x or Mali-T76x"
+	select GATOR_WITH_MALI_SUPPORT
+
+endchoice
+
+config GATOR_MALI_PATH
+	string "Path to Mali driver"
+	depends on GATOR_WITH_MALI_SUPPORT
+	default "drivers/gpu/arm/mali400mp"
+	help
+	  The gator code adds this to its include path so it can get the Mali
+	  trace headers with: #include "linux/mali_linux_trace.h"
diff --git a/drivers/gator/LICENSE b/drivers/gator/LICENSE
new file mode 100644
index 000000000000..d159169d1050
--- /dev/null
+++ b/drivers/gator/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
new file mode 100644
index 000000000000..28d2070b11d5
--- /dev/null
+++ b/drivers/gator/Makefile
@@ -0,0 +1,79 @@
+ifneq ($(KERNELRELEASE),)
+
+# Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c
+# EXTRA_CFLAGS +=	-DGATOR_KERNEL_STACK_UNWINDING
+
+CONFIG_GATOR ?= m
+obj-$(CONFIG_GATOR) := gator.o
+
+gator-y :=	gator_main.o \
+		gator_events_block.o \
+		gator_events_irq.o \
+		gator_events_meminfo.o \
+		gator_events_mmapped.o \
+		gator_events_net.o \
+		gator_events_perf_pmu.o \
+		gator_events_sched.o \
+
+# Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
+ifneq ($(GATOR_WITH_MALI_SUPPORT),)
+  CONFIG_GATOR_WITH_MALI_SUPPORT := y
+  ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_MIDGARD)
+    CONFIG_GATOR_MALI_4XXMP := n
+    CONFIG_GATOR_MALI_MIDGARD := y
+  else
+    CONFIG_GATOR_MALI_4XXMP := y
+    CONFIG_GATOR_MALI_MIDGARD := n
+  endif
+  EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
+  ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
+    EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE)
+  endif
+endif
+
+ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
+  ifeq ($(CONFIG_GATOR_MALI_MIDGARD),y)
+    gator-y +=	gator_events_mali_midgard.o \
+		gator_events_mali_midgard_hw.o
+    include $(src)/mali_midgard.mk
+  else
+    gator-y +=	gator_events_mali_4xx.o
+  endif
+  gator-y +=	gator_events_mali_common.o
+
+  ifneq ($(CONFIG_GATOR_MALI_PATH),)
+    ccflags-y += -I$(CONFIG_GATOR_MALI_PATH)
+  endif
+  ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx
+  ccflags-$(CONFIG_GATOR_MALI_MIDGARD) += -DMALI_SUPPORT=MALI_MIDGARD
+endif
+
+# GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
+GATOR_TEST ?= 0
+EXTRA_CFLAGS +=	-DGATOR_TEST=$(GATOR_TEST)
+
+# Should the original or new block_rq_complete API be used?
+OLD_BLOCK_RQ_COMPLETE := $(shell grep -A3 block_rq_complete $(srctree)/include/trace/events/block.h | grep nr_bytes -q; echo $$?)
+EXTRA_CFLAGS += -DOLD_BLOCK_RQ_COMPLETE=$(OLD_BLOCK_RQ_COMPLETE)
+
+gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
+			gator_events_armv7.o \
+			gator_events_l2c-310.o \
+			gator_events_scorpion.o
+
+gator-$(CONFIG_ARM64) +=
+
+else
+
+all:
+	@echo
+	@echo "usage:"
+	@echo "      make -C <kernel_build_dir> M=\`pwd\` ARCH=arm CROSS_COMPILE=<...> modules"
+	@echo
+	$(error)
+
+clean:
+	rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c
+	rm -rf .tmp_versions
+
+endif
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
new file mode 100644
index 000000000000..5cc73a388c4f
--- /dev/null
+++ b/drivers/gator/gator.h
@@ -0,0 +1,152 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef GATOR_H_
+#define GATOR_H_
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+
+#define GATOR_PERF_SUPPORT      (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0))
+#define GATOR_PERF_PMU_SUPPORT  (GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS)))
+#define GATOR_NO_PERF_SUPPORT   (!(GATOR_PERF_SUPPORT))
+#define GATOR_CPU_FREQ_SUPPORT  ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ))
+#define GATOR_IKS_SUPPORT       defined(CONFIG_BL_SWITCHER)
+
+/* cpu ids */
+#define ARM1136     0xb36
+#define ARM1156     0xb56
+#define ARM1176     0xb76
+#define ARM11MPCORE 0xb02
+#define CORTEX_A5   0xc05
+#define CORTEX_A7   0xc07
+#define CORTEX_A8   0xc08
+#define CORTEX_A9   0xc09
+#define CORTEX_A15  0xc0f
+#define CORTEX_A17  0xc0e
+#define SCORPION    0x00f
+#define SCORPIONMP  0x02d
+#define KRAITSIM    0x049
+#define KRAIT       0x04d
+#define KRAIT_S4_PRO 0x06f
+#define CORTEX_A53  0xd03
+#define CORTEX_A57  0xd07
+#define AARCH64     0xd0f
+#define OTHER       0xfff
+
+/* gpu enums */
+#define MALI_4xx     1
+#define MALI_MIDGARD 2
+
+#define MAXSIZE_CORE_NAME 32
+
+struct gator_cpu {
+	const int cpuid;
+	/* Human readable name */
+	const char core_name[MAXSIZE_CORE_NAME];
+	/* gatorfs event and Perf PMU name */
+	const char *const pmnc_name;
+	/* compatible from Documentation/devicetree/bindings/arm/cpus.txt */
+	const char *const dt_name;
+	const int pmnc_counters;
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid);
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name);
+
+/******************************************************************************
+ * Filesystem
+ ******************************************************************************/
+struct dentry *gatorfs_mkdir(struct super_block *sb, struct dentry *root,
+			     char const *name);
+
+int gatorfs_create_ulong(struct super_block *sb, struct dentry *root,
+			 char const *name, unsigned long *val);
+
+int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
+			    char const *name, unsigned long *val);
+
+/******************************************************************************
+ * Tracepoints
+ ******************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#	error Kernels prior to 2.6.32 not supported
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		static void probe_##probe_name(PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		register_trace_##probe_name(probe_##probe_name)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		unregister_trace_##probe_name(probe_##probe_name)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		static void probe_##probe_name(void *data, PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		register_trace_##probe_name(probe_##probe_name, NULL)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		unregister_trace_##probe_name(probe_##probe_name, NULL)
+#else
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		extern struct tracepoint *gator_tracepoint_##probe_name; \
+		static void probe_##probe_name(void *data, PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		((gator_tracepoint_##probe_name == NULL) || tracepoint_probe_register(gator_tracepoint_##probe_name, probe_##probe_name, NULL))
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		tracepoint_probe_unregister(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
+#endif
+
+/******************************************************************************
+ * Events
+ ******************************************************************************/
+struct gator_interface {
+	/* Complementary function to init */
+	void (*shutdown)(void);
+	int (*create_files)(struct super_block *sb, struct dentry *root);
+	int (*start)(void);
+	/* Complementary function to start */
+	void (*stop)(void);
+	int (*online)(int **buffer, bool migrate);
+	int (*offline)(int **buffer, bool migrate);
+	/* called in process context but may not be running on core 'cpu' */
+	void (*online_dispatch)(int cpu, bool migrate);
+	/* called in process context but may not be running on core 'cpu' */
+	void (*offline_dispatch)(int cpu, bool migrate);
+	int (*read)(int **buffer, bool sched_switch);
+	int (*read64)(long long **buffer);
+	int (*read_proc)(long long **buffer, struct task_struct *);
+	struct list_head list;
+};
+
+int gator_events_install(struct gator_interface *interface);
+int gator_events_get_key(void);
+u32 gator_cpuid(void);
+
+void gator_backtrace_handler(struct pt_regs *const regs);
+
+void gator_marshal_activity_switch(int core, int key, int activity, int pid);
+
+#if !GATOR_IKS_SUPPORT
+
+#define get_physical_cpu() smp_processor_id()
+#define lcpu_to_pcpu(lcpu) lcpu
+#define pcpu_to_lcpu(pcpu) pcpu
+
+#else
+
+#define get_physical_cpu() lcpu_to_pcpu(get_logical_cpu())
+int lcpu_to_pcpu(const int lcpu);
+int pcpu_to_lcpu(const int pcpu);
+
+#endif
+
+#define get_logical_cpu() smp_processor_id()
+#define on_primary_core() (get_logical_cpu() == 0)
+
+#endif /* GATOR_H_ */
diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c
new file mode 100644
index 000000000000..ff9a3cef7b2e
--- /dev/null
+++ b/drivers/gator/gator_annotate.c
@@ -0,0 +1,189 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <asm/current.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(annotate_lock);
+static bool collect_annotations;
+
+static int annotate_copy(struct file *file, char const __user *buf, size_t count)
+{
+	int cpu = 0;
+	int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF];
+
+	if (file == NULL) {
+		/* copy from kernel */
+		memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count);
+	} else {
+		/* copy from user space */
+		if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0)
+			return -1;
+	}
+	per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF] = (write + count) & gator_buffer_mask[ANNOTATE_BUF];
+
+	return 0;
+}
+
+static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset)
+{
+	int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
+	bool interrupt_context;
+
+	if (*offset)
+		return -EINVAL;
+
+	interrupt_context = in_interrupt();
+	/* Annotations are not supported in interrupt context, but may work
+	 * if you comment out the the next four lines of code. By doing so,
+	 * annotations in interrupt context can result in deadlocks and lost
+	 * data.
+	 */
+	if (interrupt_context) {
+		pr_warning("gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
+		return -EINVAL;
+	}
+
+ retry:
+	/* synchronize between cores and with collect_annotations */
+	spin_lock(&annotate_lock);
+
+	if (!collect_annotations) {
+		/* Not collecting annotations, tell the caller everything was written */
+		size = count_orig;
+		goto annotate_write_out;
+	}
+
+	/* Annotation only uses a single per-cpu buffer as the data must be in order to the engine */
+	cpu = 0;
+
+	if (current == NULL)
+		pid = 0;
+	else
+		pid = current->pid;
+
+	/* determine total size of the payload */
+	header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64;
+	available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size;
+	size = count < available ? count : available;
+
+	if (size <= 0) {
+		/* Buffer is full, wait until space is available */
+		spin_unlock(&annotate_lock);
+
+		/* Drop the annotation as blocking is not allowed in interrupt context */
+		if (interrupt_context)
+			return -EINVAL;
+
+		wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
+
+		/* Check to see if a signal is pending */
+		if (signal_pending(current))
+			return -EINTR;
+
+		goto retry;
+	}
+
+	/* synchronize shared variables annotateBuf and annotatePos */
+	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) {
+		u64 time = gator_get_time();
+
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time);
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size);
+
+		/* determine the sizes to capture, length1 + length2 will equal size */
+		contiguous = contiguous_space_available(cpu, ANNOTATE_BUF);
+		if (size < contiguous) {
+			length1 = size;
+			length2 = 0;
+		} else {
+			length1 = contiguous;
+			length2 = size - contiguous;
+		}
+
+		if (annotate_copy(file, buf, length1) != 0) {
+			size = -EINVAL;
+			goto annotate_write_out;
+		}
+
+		if (length2 > 0 && annotate_copy(file, &buf[length1], length2) != 0) {
+			size = -EINVAL;
+			goto annotate_write_out;
+		}
+
+		/* Check and commit; commit is set to occur once buffer is 3/4 full */
+		buffer_check(cpu, ANNOTATE_BUF, time);
+	}
+
+annotate_write_out:
+	spin_unlock(&annotate_lock);
+
+	/* return the number of bytes written */
+	return size;
+}
+
+#include "gator_annotate_kernel.c"
+
+static int annotate_release(struct inode *inode, struct file *file)
+{
+	int cpu = 0;
+
+	/* synchronize between cores */
+	spin_lock(&annotate_lock);
+
+	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+		uint32_t pid = current->pid;
+
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+		/* time */
+		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0);
+		/* size */
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0);
+	}
+
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, ANNOTATE_BUF, gator_get_time());
+
+	spin_unlock(&annotate_lock);
+
+	return 0;
+}
+
+static const struct file_operations annotate_fops = {
+	.write = annotate_write,
+	.release = annotate_release
+};
+
+static int gator_annotate_create_files(struct super_block *sb, struct dentry *root)
+{
+	return gatorfs_create_file_perm(sb, root, "annotate", &annotate_fops, 0666);
+}
+
+static int gator_annotate_start(void)
+{
+	collect_annotations = true;
+	return 0;
+}
+
+static void gator_annotate_stop(void)
+{
+	/* the spinlock here will ensure that when this function exits, we are not in the middle of an annotation */
+	spin_lock(&annotate_lock);
+	collect_annotations = false;
+	wake_up(&gator_annotate_wait);
+	spin_unlock(&annotate_lock);
+}
diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c
new file mode 100644
index 000000000000..69471f99e5fb
--- /dev/null
+++ b/drivers/gator/gator_annotate_kernel.c
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define ESCAPE_CODE 0x1c
+#define STRING_ANNOTATION 0x06
+#define NAME_CHANNEL_ANNOTATION 0x07
+#define NAME_GROUP_ANNOTATION 0x08
+#define VISUAL_ANNOTATION 0x04
+#define MARKER_ANNOTATION 0x05
+
+static void kannotate_write(const char *ptr, unsigned int size)
+{
+	int retval;
+	int pos = 0;
+	loff_t offset = 0;
+
+	while (pos < size) {
+		retval = annotate_write(NULL, &ptr[pos], size - pos, &offset);
+		if (retval < 0) {
+			pr_warning("gator: kannotate_write failed with return value %d\n", retval);
+			return;
+		}
+		pos += retval;
+	}
+}
+
+static void marshal_u16(char *buf, u16 val)
+{
+	buf[0] = val & 0xff;
+	buf[1] = (val >> 8) & 0xff;
+}
+
+static void marshal_u32(char *buf, u32 val)
+{
+	buf[0] = val & 0xff;
+	buf[1] = (val >> 8) & 0xff;
+	buf[2] = (val >> 16) & 0xff;
+	buf[3] = (val >> 24) & 0xff;
+}
+
+void gator_annotate_channel(int channel, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[8];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+EXPORT_SYMBOL(gator_annotate_channel);
+
+void gator_annotate(const char *str)
+{
+	gator_annotate_channel(0, str);
+}
+EXPORT_SYMBOL(gator_annotate);
+
+void gator_annotate_channel_color(int channel, int color, const char *str)
+{
+	const u16 str_size = (strlen(str) + 4) & 0xffff;
+	char header[12];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, str_size);
+	marshal_u32(header + 8, color);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size - 4);
+}
+EXPORT_SYMBOL(gator_annotate_channel_color);
+
+void gator_annotate_color(int color, const char *str)
+{
+	gator_annotate_channel_color(0, color, str);
+}
+EXPORT_SYMBOL(gator_annotate_color);
+
+void gator_annotate_channel_end(int channel)
+{
+	char header[8];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, 0);
+	kannotate_write(header, sizeof(header));
+}
+EXPORT_SYMBOL(gator_annotate_channel_end);
+
+void gator_annotate_end(void)
+{
+	gator_annotate_channel_end(0);
+}
+EXPORT_SYMBOL(gator_annotate_end);
+
+void gator_annotate_name_channel(int channel, int group, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[12];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = NAME_CHANNEL_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u32(header + 6, group);
+	marshal_u16(header + 10, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+EXPORT_SYMBOL(gator_annotate_name_channel);
+
+void gator_annotate_name_group(int group, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[8];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = NAME_GROUP_ANNOTATION;
+	marshal_u32(header + 2, group);
+	marshal_u16(header + 6, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+EXPORT_SYMBOL(gator_annotate_name_group);
+
+void gator_annotate_visual(const char *data, unsigned int length, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[4];
+	char header_length[4];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = VISUAL_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	marshal_u32(header_length, length);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+	kannotate_write(header_length, sizeof(header_length));
+	kannotate_write(data, length);
+}
+EXPORT_SYMBOL(gator_annotate_visual);
+
+void gator_annotate_marker(void)
+{
+	char header[4];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, 0);
+	kannotate_write(header, sizeof(header));
+}
+EXPORT_SYMBOL(gator_annotate_marker);
+
+void gator_annotate_marker_str(const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[4];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+EXPORT_SYMBOL(gator_annotate_marker_str);
+
+void gator_annotate_marker_color(int color)
+{
+	char header[8];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, 4);
+	marshal_u32(header + 4, color);
+	kannotate_write(header, sizeof(header));
+}
+EXPORT_SYMBOL(gator_annotate_marker_color);
+
+void gator_annotate_marker_color_str(int color, const char *str)
+{
+	const u16 str_size = (strlen(str) + 4) & 0xffff;
+	char header[8];
+
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	marshal_u32(header + 4, color);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size - 4);
+}
+EXPORT_SYMBOL(gator_annotate_marker_color_str);
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
new file mode 100644
index 000000000000..76c941d009a9
--- /dev/null
+++ b/drivers/gator/gator_backtrace.c
@@ -0,0 +1,208 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * EABI backtrace stores {fp,lr} on the stack.
+ */
+struct stack_frame_eabi {
+	union {
+		struct {
+			unsigned long fp;
+			/* May be the fp in the case of a leaf function or clang */
+			unsigned long lr;
+			/* If lr is really the fp, lr2 is the corresponding lr */
+			unsigned long lr2;
+		};
+		/* Used to read 32 bit fp/lr from a 64 bit kernel */
+		struct {
+			u32 fp_32;
+			/* same as lr above */
+			u32 lr_32;
+			/* same as lr2 above */
+			u32 lr2_32;
+		};
+	};
+};
+
+static void gator_add_trace(int cpu, unsigned long address)
+{
+	off_t offset = 0;
+	unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
+
+	if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE)
+		offset = address;
+
+	marshal_backtrace(offset & ~1, cookie, 0);
+}
+
+static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int depth)
+{
+#if defined(__arm__) || defined(__aarch64__)
+	struct stack_frame_eabi *curr;
+	struct stack_frame_eabi bufcurr;
+#if defined(__arm__)
+	const bool is_compat = false;
+	unsigned long fp = regs->ARM_fp;
+	unsigned long sp = regs->ARM_sp;
+	unsigned long lr = regs->ARM_lr;
+	const int gcc_frame_offset = sizeof(unsigned long);
+#else
+	/* Is userspace aarch32 (32 bit) */
+	const bool is_compat = compat_user_mode(regs);
+	unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]);
+	unsigned long sp = (is_compat ? regs->compat_sp : regs->sp);
+	unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]);
+	const int gcc_frame_offset = (is_compat ? sizeof(u32) : 0);
+#endif
+	/* clang frame offset is always zero */
+	int is_user_mode = user_mode(regs);
+
+	/* pc (current function) has already been added */
+
+	if (!is_user_mode)
+		return;
+
+	/* Add the lr (parent function), entry preamble may not have
+	 * executed
+	 */
+	gator_add_trace(cpu, lr);
+
+	/* check fp is valid */
+	if (fp == 0 || fp < sp)
+		return;
+
+	/* Get the current stack frame */
+	curr = (struct stack_frame_eabi *)(fp - gcc_frame_offset);
+	if ((unsigned long)curr & 3)
+		return;
+
+	while (depth-- && curr) {
+		if (!access_ok(VERIFY_READ, curr, sizeof(struct stack_frame_eabi)) ||
+				__copy_from_user_inatomic(&bufcurr, curr, sizeof(struct stack_frame_eabi))) {
+			return;
+		}
+
+		fp = (is_compat ? bufcurr.fp_32 : bufcurr.fp);
+		lr = (is_compat ? bufcurr.lr_32 : bufcurr.lr);
+
+#define calc_next(reg) ((reg) - gcc_frame_offset)
+		/* Returns true if reg is a valid fp */
+#define validate_next(reg, curr) \
+		((reg) != 0 && (calc_next(reg) & 3) == 0 && (unsigned long)(curr) < calc_next(reg))
+
+		/* Try lr from the stack as the fp because gcc leaf functions do
+		 * not push lr. If gcc_frame_offset is non-zero, the lr will also
+		 * be the clang fp. This assumes code is at a lower address than
+		 * the stack
+		 */
+		if (validate_next(lr, curr)) {
+			fp = lr;
+			lr = (is_compat ? bufcurr.lr2_32 : bufcurr.lr2);
+		}
+
+		gator_add_trace(cpu, lr);
+
+		if (!validate_next(fp, curr))
+			return;
+
+		/* Move to the next stack frame */
+		curr = (struct stack_frame_eabi *)calc_next(fp);
+	}
+#endif
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+static int report_trace(struct stackframe *frame, void *d)
+{
+	unsigned int *depth = d, cookie = NO_COOKIE;
+	unsigned long addr = frame->pc;
+
+	if (*depth) {
+#if defined(MODULE)
+		unsigned int cpu = get_physical_cpu();
+		struct module *mod = __module_address(addr);
+
+		if (mod) {
+			cookie = get_cookie(cpu, current, mod->name, false);
+			addr = addr - (unsigned long)mod->module_core;
+		}
+#endif
+		marshal_backtrace(addr & ~1, cookie, 1);
+		(*depth)--;
+	}
+
+	return *depth == 0;
+}
+#endif
+
+/* Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile */
+/* #define GATOR_KERNEL_STACK_UNWINDING */
+
+#if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING)
+/* Disabled by default */
+MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding.");
+static bool kernel_stack_unwinding;
+module_param(kernel_stack_unwinding, bool, 0644);
+#endif
+
+static void kernel_backtrace(int cpu, struct pt_regs *const regs)
+{
+#if defined(__arm__) || defined(__aarch64__)
+#ifdef GATOR_KERNEL_STACK_UNWINDING
+	int depth = gator_backtrace_depth;
+#else
+	int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1);
+#endif
+	struct stackframe frame;
+
+	if (depth == 0)
+		depth = 1;
+#if defined(__arm__)
+	frame.fp = regs->ARM_fp;
+	frame.sp = regs->ARM_sp;
+	frame.lr = regs->ARM_lr;
+	frame.pc = regs->ARM_pc;
+#else
+	frame.fp = regs->regs[29];
+	frame.sp = regs->sp;
+	frame.pc = regs->pc;
+#endif
+	walk_stackframe(&frame, report_trace, &depth);
+#else
+	marshal_backtrace(PC_REG & ~1, NO_COOKIE, 1);
+#endif
+}
+
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
+{
+	bool in_kernel;
+	unsigned long exec_cookie;
+
+	if (!regs)
+		return;
+
+	in_kernel = !user_mode(regs);
+	exec_cookie = get_exec_cookie(cpu, current);
+
+	if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, time))
+		return;
+
+	if (in_kernel) {
+		kernel_backtrace(cpu, regs);
+	} else {
+		/* Cookie+PC */
+		gator_add_trace(cpu, PC_REG);
+
+		/* Backtrace */
+		if (gator_backtrace_depth)
+			arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
+	}
+
+	marshal_backtrace_footer(time);
+}
diff --git a/drivers/gator/gator_buffer.c b/drivers/gator/gator_buffer.c
new file mode 100644
index 000000000000..910d5aa15066
--- /dev/null
+++ b/drivers/gator/gator_buffer.c
@@ -0,0 +1,171 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+static void marshal_frame(int cpu, int buftype)
+{
+	int frame;
+	bool write_cpu;
+
+	if (!per_cpu(gator_buffer, cpu)[buftype])
+		return;
+
+	switch (buftype) {
+	case SUMMARY_BUF:
+		write_cpu = false;
+		frame = FRAME_SUMMARY;
+		break;
+	case BACKTRACE_BUF:
+		write_cpu = true;
+		frame = FRAME_BACKTRACE;
+		break;
+	case NAME_BUF:
+		write_cpu = true;
+		frame = FRAME_NAME;
+		break;
+	case COUNTER_BUF:
+		write_cpu = false;
+		frame = FRAME_COUNTER;
+		break;
+	case BLOCK_COUNTER_BUF:
+		write_cpu = true;
+		frame = FRAME_BLOCK_COUNTER;
+		break;
+	case ANNOTATE_BUF:
+		write_cpu = false;
+		frame = FRAME_ANNOTATE;
+		break;
+	case SCHED_TRACE_BUF:
+		write_cpu = true;
+		frame = FRAME_SCHED_TRACE;
+		break;
+	case IDLE_BUF:
+		write_cpu = false;
+		frame = FRAME_IDLE;
+		break;
+	case ACTIVITY_BUF:
+		write_cpu = false;
+		frame = FRAME_ACTIVITY;
+		break;
+	default:
+		write_cpu = false;
+		frame = -1;
+		break;
+	}
+
+	/* add response type */
+	if (gator_response_type > 0)
+		gator_buffer_write_packed_int(cpu, buftype, gator_response_type);
+
+	/* leave space for 4-byte unpacked length */
+	per_cpu(gator_buffer_write, cpu)[buftype] = (per_cpu(gator_buffer_write, cpu)[buftype] + sizeof(s32)) & gator_buffer_mask[buftype];
+
+	/* add frame type and core number */
+	gator_buffer_write_packed_int(cpu, buftype, frame);
+	if (write_cpu)
+		gator_buffer_write_packed_int(cpu, buftype, cpu);
+}
+
+static int buffer_bytes_available(int cpu, int buftype)
+{
+	int remaining, filled;
+
+	filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_read, cpu)[buftype];
+	if (filled < 0)
+		filled += gator_buffer_size[buftype];
+
+	remaining = gator_buffer_size[buftype] - filled;
+
+	if (per_cpu(buffer_space_available, cpu)[buftype])
+		/* Give some extra room; also allows space to insert the overflow error packet */
+		remaining -= 200;
+	else
+		/* Hysteresis, prevents multiple overflow messages */
+		remaining -= 2000;
+
+	return remaining;
+}
+
+static bool buffer_check_space(int cpu, int buftype, int bytes)
+{
+	int remaining = buffer_bytes_available(cpu, buftype);
+
+	if (remaining < bytes)
+		per_cpu(buffer_space_available, cpu)[buftype] = false;
+	else
+		per_cpu(buffer_space_available, cpu)[buftype] = true;
+
+	return per_cpu(buffer_space_available, cpu)[buftype];
+}
+
+static int contiguous_space_available(int cpu, int buftype)
+{
+	int remaining = buffer_bytes_available(cpu, buftype);
+	int contiguous = gator_buffer_size[buftype] - per_cpu(gator_buffer_write, cpu)[buftype];
+
+	if (remaining < contiguous)
+		return remaining;
+	return contiguous;
+}
+
+static void gator_commit_buffer(int cpu, int buftype, u64 time)
+{
+	int type_length, commit, length, byte;
+	unsigned long flags;
+
+	if (!per_cpu(gator_buffer, cpu)[buftype])
+		return;
+
+	/* post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload */
+	local_irq_save(flags);
+	type_length = gator_response_type ? 1 : 0;
+	commit = per_cpu(gator_buffer_commit, cpu)[buftype];
+	length = per_cpu(gator_buffer_write, cpu)[buftype] - commit;
+	if (length < 0)
+		length += gator_buffer_size[buftype];
+	length = length - type_length - sizeof(s32);
+
+	if (length <= FRAME_HEADER_SIZE) {
+		/* Nothing to write, only the frame header is present */
+		local_irq_restore(flags);
+		return;
+	}
+
+	for (byte = 0; byte < sizeof(s32); byte++)
+		per_cpu(gator_buffer, cpu)[buftype][(commit + type_length + byte) & gator_buffer_mask[buftype]] = (length >> byte * 8) & 0xFF;
+
+	per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
+
+	if (gator_live_rate > 0) {
+		while (time > per_cpu(gator_buffer_commit_time, cpu))
+			per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
+	}
+
+	marshal_frame(cpu, buftype);
+	local_irq_restore(flags);
+
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
+	if (per_cpu(in_scheduler_context, cpu)) {
+#ifndef CONFIG_PREEMPT_RT_FULL
+		/* mod_timer can not be used in interrupt context in RT-Preempt full */
+		mod_timer(&gator_buffer_wake_up_timer, jiffies + 1);
+#endif
+	} else {
+		up(&gator_buffer_wake_sem);
+	}
+}
+
+static void buffer_check(int cpu, int buftype, u64 time)
+{
+	int filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_commit, cpu)[buftype];
+
+	if (filled < 0)
+		filled += gator_buffer_size[buftype];
+	if (filled >= ((gator_buffer_size[buftype] * 3) / 4))
+		gator_commit_buffer(cpu, buftype, time);
+}
diff --git a/drivers/gator/gator_buffer_write.c b/drivers/gator/gator_buffer_write.c
new file mode 100644
index 000000000000..654ec606cfad
--- /dev/null
+++ b/drivers/gator/gator_buffer_write.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+static void gator_buffer_write_packed_int(int cpu, int buftype, int x)
+{
+	uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype];
+	uint32_t mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+	int packedBytes = 0;
+	int more = true;
+
+	while (more) {
+		/* low order 7 bits of x */
+		char b = x & 0x7f;
+
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0))
+			more = false;
+		else
+			b |= 0x80;
+
+		buffer[(write + packedBytes) & mask] = b;
+		packedBytes++;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask;
+}
+
+static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x)
+{
+	uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype];
+	uint32_t mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+	int packedBytes = 0;
+	int more = true;
+
+	while (more) {
+		/* low order 7 bits of x */
+		char b = x & 0x7f;
+
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0))
+			more = false;
+		else
+			b |= 0x80;
+
+		buffer[(write + packedBytes) & mask] = b;
+		packedBytes++;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask;
+}
+
+static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len)
+{
+	int i;
+	u32 write = per_cpu(gator_buffer_write, cpu)[buftype];
+	u32 mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+
+	for (i = 0; i < len; i++) {
+		buffer[write] = x[i];
+		write = (write + 1) & mask;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = write;
+}
+
+static void gator_buffer_write_string(int cpu, int buftype, const char *x)
+{
+	int len = strlen(x);
+
+	gator_buffer_write_packed_int(cpu, buftype, len);
+	gator_buffer_write_bytes(cpu, buftype, x, len);
+}
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
new file mode 100644
index 000000000000..c43cce815226
--- /dev/null
+++ b/drivers/gator/gator_cookies.c
@@ -0,0 +1,446 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/* must be power of 2 */
+#define COOKIEMAP_ENTRIES	1024
+/* must be a power of 2 - 512/4 = 128 entries */
+#define TRANSLATE_BUFFER_SIZE 512
+#define TRANSLATE_TEXT_SIZE		256
+#define MAX_COLLISIONS		2
+
+static uint32_t *gator_crc32_table;
+static unsigned int translate_buffer_mask;
+
+struct cookie_args {
+	struct task_struct *task;
+	const char *text;
+};
+
+static DEFINE_PER_CPU(char *, translate_text);
+static DEFINE_PER_CPU(uint32_t, cookie_next_key);
+static DEFINE_PER_CPU(uint64_t *, cookie_keys);
+static DEFINE_PER_CPU(uint32_t *, cookie_values);
+static DEFINE_PER_CPU(int, translate_buffer_read);
+static DEFINE_PER_CPU(int, translate_buffer_write);
+static DEFINE_PER_CPU(struct cookie_args *, translate_buffer);
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq);
+static void wq_cookie_handler(struct work_struct *unused);
+static DECLARE_WORK(cookie_work, wq_cookie_handler);
+static struct timer_list app_process_wake_up_timer;
+static void app_process_wake_up_handler(unsigned long unused_data);
+
+static uint32_t cookiemap_code(uint64_t value64)
+{
+	uint32_t value = (uint32_t)((value64 >> 32) + value64);
+	uint32_t cookiecode = (value >> 24) & 0xff;
+
+	cookiecode = cookiecode * 31 + ((value >> 16) & 0xff);
+	cookiecode = cookiecode * 31 + ((value >> 8) & 0xff);
+	cookiecode = cookiecode * 31 + ((value >> 0) & 0xff);
+	cookiecode &= (COOKIEMAP_ENTRIES - 1);
+	return cookiecode * MAX_COLLISIONS;
+}
+
+static uint32_t gator_chksum_crc32(const char *data)
+{
+	register unsigned long crc;
+	const unsigned char *block = data;
+	int i, length = strlen(data);
+
+	crc = 0xFFFFFFFF;
+	for (i = 0; i < length; i++)
+		crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF];
+
+	return (crc ^ 0xFFFFFFFF);
+}
+
+/*
+ * Exists
+ *  Pre:  [0][1][v][3]..[n-1]
+ *  Post: [v][0][1][3]..[n-1]
+ */
+static uint32_t cookiemap_exists(uint64_t key)
+{
+	unsigned long x, flags, retval = 0;
+	int cpu = get_physical_cpu();
+	uint32_t cookiecode = cookiemap_code(key);
+	uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+	uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+
+	/* Can be called from interrupt handler or from work queue */
+	local_irq_save(flags);
+	for (x = 0; x < MAX_COLLISIONS; x++) {
+		if (keys[x] == key) {
+			uint32_t value = values[x];
+
+			for (; x > 0; x--) {
+				keys[x] = keys[x - 1];
+				values[x] = values[x - 1];
+			}
+			keys[0] = key;
+			values[0] = value;
+			retval = value;
+			break;
+		}
+	}
+	local_irq_restore(flags);
+
+	return retval;
+}
+
+/*
+ * Add
+ *  Pre:  [0][1][2][3]..[n-1]
+ *  Post: [v][0][1][2]..[n-2]
+ */
+static void cookiemap_add(uint64_t key, uint32_t value)
+{
+	int cpu = get_physical_cpu();
+	int cookiecode = cookiemap_code(key);
+	uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+	uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+	int x;
+
+	for (x = MAX_COLLISIONS - 1; x > 0; x--) {
+		keys[x] = keys[x - 1];
+		values[x] = values[x - 1];
+	}
+	keys[0] = key;
+	values[0] = value;
+}
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+static void translate_buffer_write_args(int cpu, struct task_struct *task, const char *text)
+{
+	unsigned long flags;
+	int write;
+	int next_write;
+	struct cookie_args *args;
+
+	local_irq_save(flags);
+
+	write = per_cpu(translate_buffer_write, cpu);
+	next_write = (write + 1) & translate_buffer_mask;
+
+	/* At least one entry must always remain available as when read == write, the queue is empty not full */
+	if (next_write != per_cpu(translate_buffer_read, cpu)) {
+		args = &per_cpu(translate_buffer, cpu)[write];
+		args->task = task;
+		args->text = text;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
+		get_task_struct(task);
+#endif
+		per_cpu(translate_buffer_write, cpu) = next_write;
+	}
+
+	local_irq_restore(flags);
+}
+#endif
+
+static void translate_buffer_read_args(int cpu, struct cookie_args *args)
+{
+	unsigned long flags;
+	int read;
+
+	local_irq_save(flags);
+
+	read = per_cpu(translate_buffer_read, cpu);
+	*args = per_cpu(translate_buffer, cpu)[read];
+	per_cpu(translate_buffer_read, cpu) = (read + 1) & translate_buffer_mask;
+
+	local_irq_restore(flags);
+}
+
+static void wq_cookie_handler(struct work_struct *unused)
+{
+	struct cookie_args args;
+	int cpu = get_physical_cpu(), cookie;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started != 0) {
+		while (per_cpu(translate_buffer_read, cpu) != per_cpu(translate_buffer_write, cpu)) {
+			translate_buffer_read_args(cpu, &args);
+			cookie = get_cookie(cpu, args.task, args.text, true);
+			marshal_link(cookie, args.task->tgid, args.task->pid);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
+			put_task_struct(args.task);
+#endif
+		}
+	}
+
+	mutex_unlock(&start_mutex);
+}
+
+static void app_process_wake_up_handler(unsigned long unused_data)
+{
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
+	schedule_work(&cookie_work);
+}
+
+/* Retrieve full name from proc/pid/cmdline for java processes on Android */
+static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq)
+{
+	void *maddr;
+	unsigned int len;
+	unsigned long addr;
+	struct mm_struct *mm;
+	struct page *page = NULL;
+	struct vm_area_struct *page_vma;
+	int bytes, offset, retval = 0;
+	char *buf = per_cpu(translate_text, cpu);
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+	/* Push work into a work queue if in atomic context as the kernel
+	 * functions below might sleep. Rely on the in_interrupt variable
+	 * rather than in_irq() or in_interrupt() kernel functions, as the
+	 * value of these functions seems inconsistent during a context
+	 * switch between android/linux versions
+	 */
+	if (!from_wq) {
+		/* Check if already in buffer */
+		int pos = per_cpu(translate_buffer_read, cpu);
+
+		while (pos != per_cpu(translate_buffer_write, cpu)) {
+			if (per_cpu(translate_buffer, cpu)[pos].task == task)
+				goto out;
+			pos = (pos + 1) & translate_buffer_mask;
+		}
+
+		translate_buffer_write_args(cpu, task, *text);
+
+		/* Not safe to call in RT-Preempt full in schedule switch context */
+		mod_timer(&app_process_wake_up_timer, jiffies + 1);
+		goto out;
+	}
+#endif
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out;
+	if (!mm->arg_end)
+		goto outmm;
+	addr = mm->arg_start;
+	len = mm->arg_end - mm->arg_start;
+
+	if (len > TRANSLATE_TEXT_SIZE)
+		len = TRANSLATE_TEXT_SIZE;
+
+	down_read(&mm->mmap_sem);
+	while (len) {
+		if (get_user_pages(task, mm, addr, 1, 0, 1, &page, &page_vma) <= 0)
+			goto outsem;
+
+		maddr = kmap(page);
+		offset = addr & (PAGE_SIZE - 1);
+		bytes = len;
+		if (bytes > PAGE_SIZE - offset)
+			bytes = PAGE_SIZE - offset;
+
+		copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes);
+
+		/* release page allocated by get_user_pages() */
+		kunmap(page);
+		page_cache_release(page);
+
+		len -= bytes;
+		buf += bytes;
+		addr += bytes;
+
+		*text = per_cpu(translate_text, cpu);
+		retval = 1;
+	}
+
+	/* On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period */
+	if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0)
+		retval = 0;
+
+outsem:
+	up_read(&mm->mmap_sem);
+outmm:
+	mmput(mm);
+out:
+	return retval;
+}
+
+static const char APP_PROCESS[] = "app_process";
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq)
+{
+	unsigned long flags, cookie;
+	uint64_t key;
+
+	key = gator_chksum_crc32(text);
+	key = (key << 32) | (uint32_t)task->tgid;
+
+	cookie = cookiemap_exists(key);
+	if (cookie)
+		return cookie;
+
+	/* On 64-bit android app_process can be app_process32 or app_process64 */
+	if (strncmp(text, APP_PROCESS, sizeof(APP_PROCESS) - 1) == 0) {
+		if (!translate_app_process(&text, cpu, task, from_wq))
+			return UNRESOLVED_COOKIE;
+	}
+
+	/* Can be called from interrupt handler or from work queue or from scheduler trace */
+	local_irq_save(flags);
+
+	cookie = UNRESOLVED_COOKIE;
+	if (marshal_cookie_header(text)) {
+		cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids;
+		cookiemap_add(key, cookie);
+		marshal_cookie(cookie, text);
+	}
+
+	local_irq_restore(flags);
+
+	return cookie;
+}
+
+static int get_exec_cookie(int cpu, struct task_struct *task)
+{
+	struct mm_struct *mm = task->mm;
+	const char *text;
+
+	/* kernel threads have no address space */
+	if (!mm)
+		return NO_COOKIE;
+
+	if (task && task->mm && task->mm->exe_file) {
+		text = task->mm->exe_file->f_path.dentry->d_name.name;
+		return get_cookie(cpu, task, text, false);
+	}
+
+	return UNRESOLVED_COOKIE;
+}
+
+static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset)
+{
+	unsigned long cookie = NO_COOKIE;
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+	const char *text;
+
+	if (!mm)
+		return cookie;
+
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		if (addr < vma->vm_start || addr >= vma->vm_end)
+			continue;
+
+		if (vma->vm_file) {
+			text = vma->vm_file->f_path.dentry->d_name.name;
+			cookie = get_cookie(cpu, task, text, false);
+			*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start;
+		} else {
+			/* must be an anonymous map */
+			*offset = addr;
+		}
+
+		break;
+	}
+
+	if (!vma)
+		cookie = UNRESOLVED_COOKIE;
+
+	return cookie;
+}
+
+static int cookies_initialize(void)
+{
+	uint32_t crc, poly;
+	int i, j, cpu, size, err = 0;
+
+	translate_buffer_mask = TRANSLATE_BUFFER_SIZE / sizeof(per_cpu(translate_buffer, 0)[0]) - 1;
+
+	for_each_present_cpu(cpu) {
+		per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu;
+
+		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t);
+		per_cpu(cookie_keys, cpu) = kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(cookie_keys, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+		memset(per_cpu(cookie_keys, cpu), 0, size);
+
+		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t);
+		per_cpu(cookie_values, cpu) = kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(cookie_values, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+		memset(per_cpu(cookie_values, cpu), 0, size);
+
+		per_cpu(translate_buffer, cpu) = kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL);
+		if (!per_cpu(translate_buffer, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+
+		per_cpu(translate_buffer_write, cpu) = 0;
+		per_cpu(translate_buffer_read, cpu) = 0;
+
+		per_cpu(translate_text, cpu) = kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL);
+		if (!per_cpu(translate_text, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+	}
+
+	/* build CRC32 table */
+	poly = 0x04c11db7;
+	gator_crc32_table = kmalloc(256 * sizeof(*gator_crc32_table), GFP_KERNEL);
+	if (!gator_crc32_table) {
+		err = -ENOMEM;
+		goto cookie_setup_error;
+	}
+	for (i = 0; i < 256; i++) {
+		crc = i;
+		for (j = 8; j > 0; j--) {
+			if (crc & 1)
+				crc = (crc >> 1) ^ poly;
+			else
+				crc >>= 1;
+		}
+		gator_crc32_table[i] = crc;
+	}
+
+	setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0);
+
+cookie_setup_error:
+	return err;
+}
+
+static void cookies_release(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(cookie_keys, cpu));
+		per_cpu(cookie_keys, cpu) = NULL;
+
+		kfree(per_cpu(cookie_values, cpu));
+		per_cpu(cookie_values, cpu) = NULL;
+
+		kfree(per_cpu(translate_buffer, cpu));
+		per_cpu(translate_buffer, cpu) = NULL;
+		per_cpu(translate_buffer_read, cpu) = 0;
+		per_cpu(translate_buffer_write, cpu) = 0;
+
+		kfree(per_cpu(translate_text, cpu));
+		per_cpu(translate_text, cpu) = NULL;
+	}
+
+	del_timer_sync(&app_process_wake_up_timer);
+	kfree(gator_crc32_table);
+	gator_crc32_table = NULL;
+}
diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c
new file mode 100644
index 000000000000..a157a0013302
--- /dev/null
+++ b/drivers/gator/gator_events_armv6.c
@@ -0,0 +1,234 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+/* gator_events_perf_pmu.c is used if perf is supported */
+#if GATOR_NO_PERF_SUPPORT
+
+static const char *pmnc_name;
+
+/*
+ * Per-CPU PMCR
+ */
+#define PMCR_E			(1 << 0)	/* Enable */
+#define PMCR_P			(1 << 1)	/* Count reset */
+#define PMCR_C			(1 << 2)	/* Cycle counter reset */
+#define PMCR_OFL_PMN0	(1 << 8)	/* Count reg 0 overflow */
+#define PMCR_OFL_PMN1	(1 << 9)	/* Count reg 1 overflow */
+#define PMCR_OFL_CCNT	(1 << 10)	/* Cycle counter overflow */
+
+#define PMN0 0
+#define PMN1 1
+#define CCNT 2
+#define CNTMAX	(CCNT+1)
+
+static int pmnc_counters;
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+static inline void armv6_pmnc_write(u32 val)
+{
+	/* upper 4bits and 7, 11 are write-as-0 */
+	val &= 0x0ffff77f;
+	asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r" (val));
+}
+
+static inline u32 armv6_pmnc_read(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val));
+	return val;
+}
+
+static void armv6_pmnc_reset_counter(unsigned int cnt)
+{
+	u32 val = 0;
+
+	switch (cnt) {
+	case CCNT:
+		asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val));
+		break;
+	case PMN0:
+		asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r" (val));
+		break;
+	case PMN1:
+		asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r" (val));
+		break;
+	}
+}
+
+int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	pmnc_counters = 3;
+
+	for (i = PMN0; i <= CCNT; i++) {
+		char buf[40];
+
+		if (i == CCNT)
+			snprintf(buf, sizeof(buf), "ARM_%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "ARM_%s_cnt%d", pmnc_name, i);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i != CCNT)
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+	}
+
+	return 0;
+}
+
+static int gator_events_armv6_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+	u32 pmnc;
+
+	if (armv6_pmnc_read() & PMCR_E)
+		armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+
+	/* initialize PMNC, reset overflow, D bit, C bit and P bit. */
+	armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT |
+			 PMCR_C | PMCR_P);
+
+	/* configure control register */
+	for (pmnc = 0, cnt = PMN0; cnt <= CCNT; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		event = pmnc_event[cnt] & 255;
+
+		/* Set event (if destined for PMNx counters) */
+		if (cnt == PMN0)
+			pmnc |= event << 20;
+		else if (cnt == PMN1)
+			pmnc |= event << 12;
+
+		/* Reset counter */
+		armv6_pmnc_reset_counter(cnt);
+	}
+	armv6_pmnc_write(pmnc | PMCR_E);
+
+	/* return zero values, no need to read as the counters were just reset */
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_armv6_offline(int **buffer, bool migrate)
+{
+	unsigned int cnt;
+
+	armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+	for (cnt = PMN0; cnt <= CCNT; cnt++)
+		armv6_pmnc_reset_counter(cnt);
+
+	return 0;
+}
+
+static void gator_events_armv6_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_armv6_read(int **buffer, bool sched_switch)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(armv6_pmnc_read() & PMCR_E))
+		return 0;
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			u32 value = 0;
+
+			switch (cnt) {
+			case CCNT:
+				asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value));
+				break;
+			case PMN0:
+				asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r" (value));
+				break;
+			case PMN1:
+				asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r" (value));
+				break;
+			}
+			armv6_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_armv6_interface = {
+	.create_files = gator_events_armv6_create_files,
+	.stop = gator_events_armv6_stop,
+	.online = gator_events_armv6_online,
+	.offline = gator_events_armv6_offline,
+	.read = gator_events_armv6_read,
+};
+
+int gator_events_armv6_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case ARM1136:
+	case ARM1156:
+	case ARM1176:
+		pmnc_name = "ARM11";
+		break;
+	case ARM11MPCORE:
+		pmnc_name = "ARM11MPCore";
+		break;
+	default:
+		return -1;
+	}
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_armv6_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
new file mode 100644
index 000000000000..09c94220114c
--- /dev/null
+++ b/drivers/gator/gator_events_armv7.c
@@ -0,0 +1,314 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*  Disabling interrupts
+ *    Many of the functions below disable interrupts via local_irq_save(). This disabling of interrupts is done to prevent any race conditions
+ *    between multiple entities (e.g. hrtimer interrupts and event based interrupts) calling the same functions. As accessing the pmu involves
+ *    several steps (disable, select, read, enable), these steps must be performed atomically. Normal synchronization routines cannot be used
+ *    as these functions are being called from interrupt context.
+ */
+
+#include "gator.h"
+
+/* gator_events_perf_pmu.c is used if perf is supported */
+#if GATOR_NO_PERF_SUPPORT
+
+/* Per-CPU PMNC: config reg */
+#define PMNC_E		(1 << 0)	/* Enable all counters */
+#define PMNC_P		(1 << 1)	/* Reset all counters */
+#define PMNC_C		(1 << 2)	/* Cycle counter reset */
+#define	PMNC_MASK	0x3f	/* Mask for writable bits */
+
+/* ccnt reg */
+#define CCNT_REG	(1 << 31)
+
+#define CCNT		0
+#define CNT0		1
+#define CNTMAX		(6+1)
+
+static const char *pmnc_name;
+static int pmnc_counters;
+
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+inline void armv7_pmnc_write(u32 val)
+{
+	val &= PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_read(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	return val;
+}
+
+inline u32 armv7_ccnt_read(u32 reset_value)
+{
+	unsigned long flags;
+	u32 newval = -reset_value;
+	u32 den = CCNT_REG;
+	u32 val;
+
+	local_irq_save(flags);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	/* disable */
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));	/* read */
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval));	/* new value */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	/* enable */
+	local_irq_restore(flags);
+
+	return val;
+}
+
+inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value)
+{
+	unsigned long flags;
+	u32 newval = -reset_value;
+	u32 sel = (cnt - CNT0);
+	u32 den = 1 << sel;
+	u32 oldval;
+
+	local_irq_save(flags);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	/* disable */
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel));	/* select */
+	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval));	/* read */
+	asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval));	/* new value */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	/* enable */
+	local_irq_restore(flags);
+
+	return oldval;
+}
+
+static inline void armv7_pmnc_disable_interrupt(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31);
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_reset_interrupt(void)
+{
+	/* Get and reset overflow status flags */
+	u32 flags;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags));
+	flags &= 0x8000003f;
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags));
+	return flags;
+}
+
+static inline u32 armv7_pmnc_enable_counter(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+	return cnt;
+}
+
+static inline u32 armv7_pmnc_disable_counter(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+	return cnt;
+}
+
+static inline int armv7_pmnc_select_counter(unsigned int cnt)
+{
+	u32 val = (cnt - CNT0);
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	return cnt;
+}
+
+static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val)
+{
+	if (armv7_pmnc_select_counter(cnt) == cnt)
+		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+}
+
+static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < pmnc_counters; i++) {
+		char buf[40];
+
+		if (i == 0)
+			snprintf(buf, sizeof(buf), "%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "%s_cnt%d", pmnc_name, i - 1);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i > 0)
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+	}
+
+	return 0;
+}
+
+static int gator_events_armv7_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+
+	if (armv7_pmnc_read() & PMNC_E)
+		armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+
+	/* Initialize & Reset PMNC: C bit and P bit */
+	armv7_pmnc_write(PMNC_P | PMNC_C);
+
+	/* Reset overflow flags */
+	armv7_pmnc_reset_interrupt();
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		/* Disable counter */
+		armv7_pmnc_disable_counter(cnt);
+
+		event = pmnc_event[cnt] & 255;
+
+		/* Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count */
+		if (cnt != CCNT)
+			armv7_pmnc_write_evtsel(cnt, event);
+
+		armv7_pmnc_disable_interrupt(cnt);
+
+		/* Reset counter */
+		cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0);
+
+		/* Enable counter */
+		armv7_pmnc_enable_counter(cnt);
+	}
+
+	/* enable */
+	armv7_pmnc_write(armv7_pmnc_read() | PMNC_E);
+
+	/* return zero values, no need to read as the counters were just reset */
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_armv7_offline(int **buffer, bool migrate)
+{
+	/* disable all counters, including PMCCNTR; overflow IRQs will not be signaled */
+	armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+
+	return 0;
+}
+
+static void gator_events_armv7_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_armv7_read(int **buffer, bool sched_switch)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(armv7_pmnc_read() & PMNC_E))
+		return 0;
+
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			int value;
+
+			if (cnt == CCNT)
+				value = armv7_ccnt_read(0);
+			else
+				value = armv7_cntn_read(cnt, 0);
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_armv7_interface = {
+	.create_files = gator_events_armv7_create_files,
+	.stop = gator_events_armv7_stop,
+	.online = gator_events_armv7_online,
+	.offline = gator_events_armv7_offline,
+	.read = gator_events_armv7_read,
+};
+
+int gator_events_armv7_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case CORTEX_A5:
+		pmnc_name = "ARMv7_Cortex_A5";
+		pmnc_counters = 2;
+		break;
+	case CORTEX_A7:
+		pmnc_name = "ARMv7_Cortex_A7";
+		pmnc_counters = 4;
+		break;
+	case CORTEX_A8:
+		pmnc_name = "ARMv7_Cortex_A8";
+		pmnc_counters = 4;
+		break;
+	case CORTEX_A9:
+		pmnc_name = "ARMv7_Cortex_A9";
+		pmnc_counters = 6;
+		break;
+	case CORTEX_A15:
+		pmnc_name = "ARMv7_Cortex_A15";
+		pmnc_counters = 6;
+		break;
+	/* ARM Cortex A17 is not supported by version of Linux before 3.0 */
+	default:
+		return -1;
+	}
+
+	pmnc_counters++;	/* CNT[n] + CCNT */
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_armv7_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
new file mode 100644
index 000000000000..a352a54afa02
--- /dev/null
+++ b/drivers/gator/gator_events_block.c
@@ -0,0 +1,160 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/block.h>
+
+#define BLOCK_RQ_WR		0
+#define BLOCK_RQ_RD		1
+
+#define BLOCK_TOTAL		(BLOCK_RQ_RD+1)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+#define EVENTWRITE REQ_RW
+#else
+#define EVENTWRITE REQ_WRITE
+#endif
+
+static ulong block_rq_wr_enabled;
+static ulong block_rq_rd_enabled;
+static ulong block_rq_wr_key;
+static ulong block_rq_rd_key;
+static atomic_t blockCnt[BLOCK_TOTAL];
+static int blockGet[BLOCK_TOTAL * 4];
+
+/* Tracepoint changed in 3.15 backported to older kernels. The Makefile tries to autodetect the correct value, but if it fails change the #if below */
+#if OLD_BLOCK_RQ_COMPLETE
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
+#else
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq, unsigned int nr_bytes))
+#endif
+{
+	int write;
+	unsigned int size;
+
+	if (!rq)
+		return;
+
+	write = rq->cmd_flags & EVENTWRITE;
+#if OLD_BLOCK_RQ_COMPLETE
+	size = rq->resid_len;
+#else
+	size = nr_bytes;
+#endif
+
+	if (!size)
+		return;
+
+	if (write) {
+		if (block_rq_wr_enabled)
+			atomic_add(size, &blockCnt[BLOCK_RQ_WR]);
+	} else {
+		if (block_rq_rd_enabled)
+			atomic_add(size, &blockCnt[BLOCK_RQ_RD]);
+	}
+}
+
+static int gator_events_block_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* block_complete_wr */
+	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key);
+
+	/* block_complete_rd */
+	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key);
+
+	return 0;
+}
+
+static int gator_events_block_start(void)
+{
+	/* register tracepoints */
+	if (block_rq_wr_enabled || block_rq_rd_enabled)
+		if (GATOR_REGISTER_TRACE(block_rq_complete))
+			goto fail_block_rq_exit;
+	pr_debug("gator: registered block event tracepoints\n");
+
+	return 0;
+
+	/* unregister tracepoints on error */
+fail_block_rq_exit:
+	pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_block_stop(void)
+{
+	if (block_rq_wr_enabled || block_rq_rd_enabled)
+		GATOR_UNREGISTER_TRACE(block_rq_complete);
+	pr_debug("gator: unregistered block event tracepoints\n");
+
+	block_rq_wr_enabled = 0;
+	block_rq_rd_enabled = 0;
+}
+
+static int gator_events_block_read(int **buffer, bool sched_switch)
+{
+	int len, value, data = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	len = 0;
+	if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) {
+		atomic_sub(value, &blockCnt[BLOCK_RQ_WR]);
+		blockGet[len++] = block_rq_wr_key;
+		/* Indicates to Streamline that value bytes were written now, not since the last message */
+		blockGet[len++] = 0;
+		blockGet[len++] = block_rq_wr_key;
+		blockGet[len++] = value;
+		data += value;
+	}
+	if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) {
+		atomic_sub(value, &blockCnt[BLOCK_RQ_RD]);
+		blockGet[len++] = block_rq_rd_key;
+		/* Indicates to Streamline that value bytes were read now, not since the last message */
+		blockGet[len++] = 0;
+		blockGet[len++] = block_rq_rd_key;
+		blockGet[len++] = value;
+		data += value;
+	}
+
+	if (buffer)
+		*buffer = blockGet;
+
+	return len;
+}
+
+static struct gator_interface gator_events_block_interface = {
+	.create_files = gator_events_block_create_files,
+	.start = gator_events_block_start,
+	.stop = gator_events_block_stop,
+	.read = gator_events_block_read,
+};
+
+int gator_events_block_init(void)
+{
+	block_rq_wr_enabled = 0;
+	block_rq_rd_enabled = 0;
+
+	block_rq_wr_key = gator_events_get_key();
+	block_rq_rd_key = gator_events_get_key();
+
+	return gator_events_install(&gator_events_block_interface);
+}
diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c
new file mode 100644
index 000000000000..5221aac581b3
--- /dev/null
+++ b/drivers/gator/gator_events_irq.c
@@ -0,0 +1,163 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/irq.h>
+
+#define HARDIRQ		0
+#define SOFTIRQ		1
+#define TOTALIRQ	(SOFTIRQ+1)
+
+static ulong hardirq_enabled;
+static ulong softirq_enabled;
+static ulong hardirq_key;
+static ulong softirq_key;
+static DEFINE_PER_CPU(atomic_t[TOTALIRQ], irqCnt);
+static DEFINE_PER_CPU(int[TOTALIRQ * 2], irqGet);
+
+GATOR_DEFINE_PROBE(irq_handler_exit,
+		   TP_PROTO(int irq, struct irqaction *action, int ret))
+{
+	atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[HARDIRQ]);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec))
+#else
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(unsigned int vec_nr))
+#endif
+{
+	atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[SOFTIRQ]);
+}
+
+static int gator_events_irq_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* irq */
+	dir = gatorfs_mkdir(sb, root, "Linux_irq_irq");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key);
+
+	/* soft irq */
+	dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key);
+
+	return 0;
+}
+
+static int gator_events_irq_online(int **buffer, bool migrate)
+{
+	int len = 0, cpu = get_physical_cpu();
+
+	/* synchronization with the irq_exit functions is not necessary as the values are being reset */
+	if (hardirq_enabled) {
+		atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0);
+		per_cpu(irqGet, cpu)[len++] = hardirq_key;
+		per_cpu(irqGet, cpu)[len++] = 0;
+	}
+
+	if (softirq_enabled) {
+		atomic_set(&per_cpu(irqCnt, cpu)[SOFTIRQ], 0);
+		per_cpu(irqGet, cpu)[len++] = softirq_key;
+		per_cpu(irqGet, cpu)[len++] = 0;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(irqGet, cpu);
+
+	return len;
+}
+
+static int gator_events_irq_start(void)
+{
+	/* register tracepoints */
+	if (hardirq_enabled)
+		if (GATOR_REGISTER_TRACE(irq_handler_exit))
+			goto fail_hardirq_exit;
+	if (softirq_enabled)
+		if (GATOR_REGISTER_TRACE(softirq_exit))
+			goto fail_softirq_exit;
+	pr_debug("gator: registered irq tracepoints\n");
+
+	return 0;
+
+	/* unregister tracepoints on error */
+fail_softirq_exit:
+	if (hardirq_enabled)
+		GATOR_UNREGISTER_TRACE(irq_handler_exit);
+fail_hardirq_exit:
+	pr_err("gator: irq tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_irq_stop(void)
+{
+	if (hardirq_enabled)
+		GATOR_UNREGISTER_TRACE(irq_handler_exit);
+	if (softirq_enabled)
+		GATOR_UNREGISTER_TRACE(softirq_exit);
+	pr_debug("gator: unregistered irq tracepoints\n");
+
+	hardirq_enabled = 0;
+	softirq_enabled = 0;
+}
+
+static int gator_events_irq_read(int **buffer, bool sched_switch)
+{
+	int len, value;
+	int cpu = get_physical_cpu();
+
+	len = 0;
+	if (hardirq_enabled) {
+		value = atomic_read(&per_cpu(irqCnt, cpu)[HARDIRQ]);
+		atomic_sub(value, &per_cpu(irqCnt, cpu)[HARDIRQ]);
+
+		per_cpu(irqGet, cpu)[len++] = hardirq_key;
+		per_cpu(irqGet, cpu)[len++] = value;
+	}
+
+	if (softirq_enabled) {
+		value = atomic_read(&per_cpu(irqCnt, cpu)[SOFTIRQ]);
+		atomic_sub(value, &per_cpu(irqCnt, cpu)[SOFTIRQ]);
+
+		per_cpu(irqGet, cpu)[len++] = softirq_key;
+		per_cpu(irqGet, cpu)[len++] = value;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(irqGet, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_irq_interface = {
+	.create_files = gator_events_irq_create_files,
+	.online = gator_events_irq_online,
+	.start = gator_events_irq_start,
+	.stop = gator_events_irq_stop,
+	.read = gator_events_irq_read,
+};
+
+int gator_events_irq_init(void)
+{
+	hardirq_key = gator_events_get_key();
+	softirq_key = gator_events_get_key();
+
+	hardirq_enabled = 0;
+	softirq_enabled = 0;
+
+	return gator_events_install(&gator_events_irq_interface);
+}
diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c
new file mode 100644
index 000000000000..73aaac32327e
--- /dev/null
+++ b/drivers/gator/gator_events_l2c-310.c
@@ -0,0 +1,208 @@
+/**
+ * l2c310 (L2 Cache Controller) event counters for gator
+ *
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#if defined(CONFIG_OF)
+#include <linux/of.h>
+#include <linux/of_address.h>
+#endif
+#include <asm/hardware/cache-l2x0.h>
+
+#include "gator.h"
+
+#define L2C310_COUNTERS_NUM 2
+
+static struct {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long key;
+} l2c310_counters[L2C310_COUNTERS_NUM];
+
+static int l2c310_buffer[L2C310_COUNTERS_NUM * 2];
+
+static void __iomem *l2c310_base;
+
+static void gator_events_l2c310_reset_counters(void)
+{
+	u32 val = readl(l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+	val |= ((1 << L2C310_COUNTERS_NUM) - 1) << 1;
+
+	writel(val, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_create_files(struct super_block *sb,
+					    struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		char buf[16];
+		struct dentry *dir;
+
+		snprintf(buf, sizeof(buf), "L2C-310_cnt%d", i);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (WARN_ON(!dir))
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled",
+				     &l2c310_counters[i].enabled);
+		gatorfs_create_ulong(sb, dir, "event",
+				     &l2c310_counters[i].event);
+		gatorfs_create_ro_ulong(sb, dir, "key",
+					&l2c310_counters[i].key);
+	}
+
+	return 0;
+}
+
+static int gator_events_l2c310_start(void)
+{
+	static const unsigned long l2x0_event_cntx_cfg[L2C310_COUNTERS_NUM] = {
+		L2X0_EVENT_CNT0_CFG,
+		L2X0_EVENT_CNT1_CFG,
+	};
+	int i;
+
+	/* Counter event sources */
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++)
+		writel((l2c310_counters[i].event & 0xf) << 2,
+		       l2c310_base + l2x0_event_cntx_cfg[i]);
+
+	gator_events_l2c310_reset_counters();
+
+	/* Event counter enable */
+	writel(1, l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+	return 0;
+}
+
+static void gator_events_l2c310_stop(void)
+{
+	/* Event counter disable */
+	writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_read(int **buffer, bool sched_switch)
+{
+	static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = {
+		L2X0_EVENT_CNT0_VAL,
+		L2X0_EVENT_CNT1_VAL,
+	};
+	int i;
+	int len = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		if (l2c310_counters[i].enabled) {
+			l2c310_buffer[len++] = l2c310_counters[i].key;
+			l2c310_buffer[len++] = readl(l2c310_base +
+						     l2x0_event_cntx_val[i]);
+		}
+	}
+
+	/* l2c310 counters are saturating, not wrapping in case of overflow */
+	gator_events_l2c310_reset_counters();
+
+	if (buffer)
+		*buffer = l2c310_buffer;
+
+	return len;
+}
+
+static struct gator_interface gator_events_l2c310_interface = {
+	.create_files = gator_events_l2c310_create_files,
+	.start = gator_events_l2c310_start,
+	.stop = gator_events_l2c310_stop,
+	.read = gator_events_l2c310_read,
+};
+
+#define L2C310_ADDR_PROBE (~0)
+
+MODULE_PARM_DESC(l2c310_addr, "L2C310 physical base address (0 to disable)");
+static unsigned long l2c310_addr = L2C310_ADDR_PROBE;
+module_param(l2c310_addr, ulong, 0444);
+
+static void __iomem *gator_events_l2c310_probe(void)
+{
+	phys_addr_t variants[] = {
+#if defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_S5PV310)
+		0x10502000,
+#endif
+#if defined(CONFIG_ARCH_OMAP4)
+		0x48242000,
+#endif
+#if defined(CONFIG_ARCH_TEGRA)
+		0x50043000,
+#endif
+#if defined(CONFIG_ARCH_U8500)
+		0xa0412000,
+#endif
+#if defined(CONFIG_ARCH_VEXPRESS)
+		0x1e00a000, /* A9x4 core tile (HBI-0191) */
+		0x2c0f0000, /* New memory map tiles */
+#endif
+	};
+	int i;
+	void __iomem *base;
+#if defined(CONFIG_OF)
+	struct device_node *node = of_find_all_nodes(NULL);
+
+	if (node) {
+		of_node_put(node);
+
+		node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache");
+		base = of_iomap(node, 0);
+		of_node_put(node);
+
+		return base;
+	}
+#endif
+
+	for (i = 0; i < ARRAY_SIZE(variants); i++) {
+		base = ioremap(variants[i], SZ_4K);
+		if (base) {
+			u32 cache_id = readl(base + L2X0_CACHE_ID);
+
+			if ((cache_id & 0xff0003c0) == 0x410000c0)
+				return base;
+
+			iounmap(base);
+		}
+	}
+
+	return NULL;
+}
+
+int gator_events_l2c310_init(void)
+{
+	int i;
+
+	if (gator_cpuid() != CORTEX_A5 && gator_cpuid() != CORTEX_A9)
+		return -1;
+
+	if (l2c310_addr == L2C310_ADDR_PROBE)
+		l2c310_base = gator_events_l2c310_probe();
+	else if (l2c310_addr)
+		l2c310_base = ioremap(l2c310_addr, SZ_4K);
+
+	if (!l2c310_base)
+		return -1;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		l2c310_counters[i].enabled = 0;
+		l2c310_counters[i].key = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_l2c310_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
new file mode 100644
index 000000000000..9cf43fe2c29b
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -0,0 +1,622 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+#include "gator_events_mali_4xx.h"
+
+/*
+* There have been four different variants of the comms between gator and Mali depending on driver version:
+* # | DDK vsn range             | Support                                                             | Notes
+*
+* 1 | (obsolete)                | No software counter support                                         | Obsolete patches
+* 2 | (obsolete)                | Tracepoint called for each separate s/w counter value as it appears | Obsolete patches
+* 3 | r3p0-04rel0 - r3p2-01rel2 | Single tracepoint for all s/w counters in a bundle.                 |
+* 4 | r3p2-01rel3 - date        | As above but with extensions for MP devices (Mali-450)              | At least r4p0-00rel1
+*/
+
+#if !defined(GATOR_MALI_INTERFACE_STYLE)
+#define GATOR_MALI_INTERFACE_STYLE (4)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+#error GATOR_MALI_INTERFACE_STYLE 1 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+#error GATOR_MALI_INTERFACE_STYLE 2 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+/* Valid GATOR_MALI_INTERFACE_STYLE */
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE < 4
+#include "mali/mali_mjollnir_profiling_gator_api.h"
+#else
+#include "mali/mali_utgard_profiling_gator_api.h"
+#endif
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_4xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
+#endif
+
+static const char mali_name[] = "4xx";
+
+/* gatorfs variables for counter enable state,
+ * the event the counter should count and the
+ * 'key' (a unique id set by gatord and returned
+ * by gator.ko)
+ */
+static unsigned long counter_enabled[NUMBER_OF_EVENTS];
+static unsigned long counter_event[NUMBER_OF_EVENTS];
+static unsigned long counter_key[NUMBER_OF_EVENTS];
+
+/* The data we have recorded */
+static u32 counter_data[NUMBER_OF_EVENTS];
+/* The address to sample (or 0 if samples are sent to us) */
+static u32 *counter_address[NUMBER_OF_EVENTS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static int counter_dump[NUMBER_OF_EVENTS * 2];
+static int counter_prev[NUMBER_OF_EVENTS];
+static bool prev_set[NUMBER_OF_EVENTS];
+
+/* Note whether tracepoints have been registered */
+static int trace_registered;
+
+/*
+ * These numbers define the actual numbers of each block type that exist in the system. Initially
+ * these are set to the maxima defined above; if the driver is capable of being queried (newer
+ * drivers only) then the values may be revised.
+ */
+static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
+static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
+static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
+
+extern struct mali_counter mali_activity[2];
+static const char *const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+};
+
+/**
+ * Returns non-zero if the given counter ID is an activity counter.
+ */
+static inline int is_activity_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_ACTIVITY_EVENT &&
+		event_id <= LAST_ACTIVITY_EVENT);
+}
+
+/**
+ * Returns non-zero if the given counter ID is a hardware counter.
+ */
+static inline int is_hw_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
+}
+
+/* Probe for hardware counter events */
+GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
+{
+	if (is_hw_counter(event_id))
+		counter_data[event_id] = value;
+}
+
+GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
+{
+	u32 i;
+
+	/* Copy over the values for those counters which are enabled. */
+	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
+		if (counter_enabled[i])
+			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
+	}
+}
+
+/**
+ * Create a single filesystem entry for a specified event.
+ * @param sb the superblock
+ * @param root Filesystem root
+ * @param name The name of the entry to create
+ * @param event The ID of the event
+ * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create.
+ *
+ * @return 0 if ok, non-zero if the create failed.
+ */
+static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item)
+{
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, name);
+
+	if (!dir)
+		return -1;
+
+	if (create_event_item)
+		gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
+
+	gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+
+	return 0;
+}
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+/*
+ * Read the version info structure if available
+ */
+static void initialise_version_info(void)
+{
+	void (*mali_profiling_get_mali_version_symbol)(struct _mali_profiling_mali_version *values);
+
+	mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
+
+	if (mali_profiling_get_mali_version_symbol) {
+		struct _mali_profiling_mali_version version_info;
+
+		pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n",
+				mali_profiling_get_mali_version_symbol);
+
+		/*
+		 * Revise the number of each different core type using information derived from the DDK.
+		 */
+		mali_profiling_get_mali_version_symbol(&version_info);
+
+		n_fp_cores = version_info.num_of_fp_cores;
+		n_vp_cores = version_info.num_of_vp_cores;
+		n_l2_cores = version_info.num_of_l2_cores;
+
+		/* Release the function - we're done with it. */
+		symbol_put(_mali_profiling_get_mali_version);
+	} else {
+		pr_err("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+		pr_err("gator:  check your Mali DDK version versus the GATOR_MALI_INTERFACE_STYLE setting\n");
+	}
+}
+#endif
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	int event;
+
+	char buf[40];
+	int core_id;
+	int counter_number;
+
+	pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE);
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+	/*
+	 * Initialise first: this sets up the number of cores available (on compatible DDK versions).
+	 * Ideally this would not need guarding but other parts of the code depend on the interface style being set
+	 * correctly; if it is not then the system can enter an inconsistent state.
+	 */
+	initialise_version_info();
+#endif
+
+	mali_activity[0].cores = n_fp_cores;
+	mali_activity[1].cores = n_vp_cores;
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0)
+			return -1;
+	}
+
+	/* Vertex processor counters */
+	for (core_id = 0; core_id < n_vp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_VP_0;
+
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0)
+			return -1;
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
+				return -1;
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < n_fp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_FP_0 + core_id;
+
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0)
+			return -1;
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
+				return -1;
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < n_l2_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
+				return -1;
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
+
+		if (create_fs_entry(sb, root, buf, event, 0) != 0)
+			return -1;
+	}
+
+	/* Now set up the special counter entries */
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Filmstrip_cnt0", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0)
+		return -1;
+
+#ifdef DVFS_REPORTED_BY_DDK
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Frequency", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0)
+		return -1;
+
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Voltage", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0)
+		return -1;
+#endif
+
+	return 0;
+}
+
+/*
+ * Local store for the get_counters entry point into the DDK.
+ * This is stored here since it is used very regularly.
+ */
+static void (*mali_get_counters)(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+static u32 (*mali_get_l2_counters)(struct _mali_profiling_l2_counter_values *values);
+
+/*
+ * Examine list of counters between two index limits and determine if any one is enabled.
+ * Returns 1 if any counter is enabled, 0 if none is.
+ */
+static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter)
+{
+	unsigned int i;
+
+	for (i = first_counter; i <= last_counter; i++) {
+		if (counter_enabled[i])
+			return 1;	/* At least one counter is enabled */
+	}
+
+	return 0;		/* No s/w counters enabled */
+}
+
+static void init_counters(unsigned int from_counter, unsigned int to_counter)
+{
+	unsigned int counter_id;
+
+	/* If a Mali driver is present and exporting the appropriate symbol
+	 * then we can request the HW counters (of which there are only 2)
+	 * be configured to count the desired events
+	 */
+	mali_profiling_set_event_type *mali_set_hw_event;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+
+		for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+			if (counter_enabled[counter_id])
+				mali_set_hw_event(counter_id, counter_event[counter_id]);
+			else
+				mali_set_hw_event(counter_id, 0xFFFFFFFF);
+		}
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		pr_err("gator: mali online _mali_profiling_set_event symbol not found\n");
+	}
+}
+
+static void mali_counter_initialize(void)
+{
+	int i;
+
+	mali_profiling_control_type *mali_control;
+
+	init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1);
+	init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1);
+	init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1);
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		/* The event attribute in the XML file keeps the actual frame rate. */
+		unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff;
+		unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff;
+
+		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+		mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_RATE, rate);
+		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+		pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		pr_err("gator: mali online _mali_profiling_control symbol not found\n");
+	}
+
+	mali_get_counters = symbol_get(_mali_profiling_get_counters);
+	if (mali_get_counters)
+		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
+	else
+		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined\n");
+
+	mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
+	if (mali_get_l2_counters)
+		pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
+	else
+		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined\n");
+
+	if (!mali_get_counters && !mali_get_l2_counters) {
+		pr_debug("gator: WARNING: no L2 counters available\n");
+		n_l2_cores = 0;
+	}
+
+	/* Clear counters in the start */
+	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
+		counter_data[i] = 0;
+		prev_set[i] = false;
+	}
+}
+
+static void mali_counter_deinitialize(void)
+{
+	mali_profiling_set_event_type *mali_set_hw_event;
+	mali_profiling_control_type *mali_control;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		int i;
+
+		pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++)
+			mali_set_hw_event(i, 0xFFFFFFFF);
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		pr_err("gator: mali offline _mali_profiling_set_event symbol not found\n");
+	}
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+
+	if (mali_control) {
+		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+		/* Reset the DDK state - disable counter collection */
+		mali_control(SW_COUNTER_ENABLE, 0);
+
+		mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		pr_err("gator: mali offline _mali_profiling_control symbol not found\n");
+	}
+
+	if (mali_get_counters)
+		symbol_put(_mali_profiling_get_counters);
+
+	if (mali_get_l2_counters)
+		symbol_put(_mali_profiling_get_l2_counters);
+}
+
+static int start(void)
+{
+	/* register tracepoints */
+	if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
+		pr_err("gator: mali_hw_counter tracepoint failed to activate\n");
+		return -1;
+	}
+
+	/* For Mali drivers with built-in support. */
+	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
+		pr_err("gator: mali_sw_counters tracepoint failed to activate\n");
+		return -1;
+	}
+
+	trace_registered = 1;
+
+	mali_counter_initialize();
+	return 0;
+}
+
+static void stop(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali stop\n");
+
+	if (trace_registered) {
+		GATOR_UNREGISTER_TRACE(mali_hw_counter);
+
+		/* For Mali drivers with built-in support. */
+		GATOR_UNREGISTER_TRACE(mali_sw_counters);
+
+		pr_debug("gator: mali timeline tracepoint deactivated\n");
+
+		trace_registered = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_address[cnt] = NULL;
+	}
+
+	mali_counter_deinitialize();
+}
+
+static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len)
+{
+	unsigned int counter_id;
+
+	for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+		if (counter_enabled[counter_id]) {
+			counter_dump[(*len)++] = counter_key[counter_id];
+			counter_dump[(*len)++] = counter_data[counter_id];
+
+			counter_data[counter_id] = 0;
+		}
+	}
+}
+
+static int read(int **buffer, bool sched_switch)
+{
+	int len = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	/* Read the L2 C0 and C1 here. */
+	if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
+		unsigned int unavailable_l2_caches = 0;
+		struct _mali_profiling_l2_counter_values cache_values;
+		unsigned int cache_id;
+		struct _mali_profiling_core_counters *per_core;
+
+		/* Poke the driver to get the counter values - older style; only one L2 cache */
+		if (mali_get_l2_counters) {
+			unavailable_l2_caches = mali_get_l2_counters(&cache_values);
+		} else if (mali_get_counters) {
+			per_core = &cache_values.cores[0];
+			mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1);
+		} else {
+			/* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */
+		}
+
+		/* Fill in the two cache counter values for each cache block. */
+		for (cache_id = 0; cache_id < n_l2_cores; cache_id++) {
+			unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
+			unsigned int counter_id_1 = counter_id_0 + 1;
+
+			if ((1 << cache_id) & unavailable_l2_caches)
+				continue; /* This cache is unavailable (powered-off, possibly). */
+
+			per_core = &cache_values.cores[cache_id];
+
+			if (counter_enabled[counter_id_0] && prev_set[counter_id_0]) {
+				/* Calculate and save src0's counter val0 */
+				counter_dump[len++] = counter_key[counter_id_0];
+				counter_dump[len++] = per_core->value0 - counter_prev[counter_id_0];
+			}
+
+			if (counter_enabled[counter_id_1] && prev_set[counter_id_1]) {
+				/* Calculate and save src1's counter val1 */
+				counter_dump[len++] = counter_key[counter_id_1];
+				counter_dump[len++] = per_core->value1 - counter_prev[counter_id_1];
+			}
+
+			/* Save the previous values for the counters. */
+			counter_prev[counter_id_0] = per_core->value0;
+			prev_set[counter_id_0] = true;
+			counter_prev[counter_id_1] = per_core->value1;
+			prev_set[counter_id_1] = true;
+		}
+	}
+
+	/* Process other (non-timeline) counters. */
+	dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len);
+	dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len);
+
+	dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len);
+
+#ifdef DVFS_REPORTED_BY_DDK
+	{
+		int cnt;
+		/*
+		 * Add in the voltage and frequency counters if enabled. Note
+		 * that, since these are actually passed as events, the counter
+		 * value should not be cleared.
+		 */
+		cnt = COUNTER_FREQUENCY;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+
+		cnt = COUNTER_VOLTAGE;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+	}
+#endif
+
+	if (buffer)
+		*buffer = counter_dump;
+
+	return len;
+}
+
+static struct gator_interface gator_events_mali_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read,
+};
+
+extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
+{
+#ifdef DVFS_REPORTED_BY_DDK
+	counter_data[COUNTER_FREQUENCY] = frequency_mhz;
+	counter_data[COUNTER_VOLTAGE] = voltage_mv;
+#endif
+}
+
+int gator_events_mali_init(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali init\n");
+
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_key[cnt] = gator_events_get_key();
+		counter_address[cnt] = NULL;
+		counter_data[cnt] = 0;
+	}
+
+	trace_registered = 0;
+
+	return gator_events_install(&gator_events_mali_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.h b/drivers/gator/gator_events_mali_4xx.h
new file mode 100644
index 000000000000..976ca8c4cfa1
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * Header contains common definitions for the Mali-4xx processors.
+ */
+#if !defined(GATOR_EVENTS_MALI_4xx_H)
+#define GATOR_EVENTS_MALI_4xx_H
+
+extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1);
+
+#endif /* GATOR_EVENTS_MALI_4xx_H */
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
new file mode 100644
index 000000000000..1af87d649afe
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.c
@@ -0,0 +1,72 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include "gator_events_mali_common.h"
+
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, struct mali_counter *counter, unsigned long *event)
+{
+	int err;
+	char buf[255];
+	struct dentry *dir;
+
+	/* If the counter name is empty ignore it */
+	if (strlen(event_name) != 0) {
+		/* Set up the filesystem entry for this event. */
+		if (mali_name == NULL)
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s", event_name);
+		else
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", mali_name, event_name);
+
+		dir = gatorfs_mkdir(sb, root, buf);
+
+		if (dir == NULL) {
+			pr_debug("gator: %s: error creating file system for: %s (%s)\n", mali_name, event_name, buf);
+			return -1;
+		}
+
+		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
+		if (err != 0) {
+			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)\n", mali_name, event_name, buf);
+			return -1;
+		}
+		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
+		if (err != 0) {
+			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+			return -1;
+		}
+		if (counter->cores != -1) {
+			err = gatorfs_create_ro_ulong(sb, dir, "cores", &counter->cores);
+			if (err != 0) {
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+				return -1;
+			}
+		}
+		if (event != NULL) {
+			err = gatorfs_create_ulong(sb, dir, "event", event);
+			if (err != 0) {
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+extern void gator_mali_initialise_counters(struct mali_counter counters[], unsigned int n_counters)
+{
+	unsigned int cnt;
+
+	for (cnt = 0; cnt < n_counters; cnt++) {
+		struct mali_counter *counter = &counters[cnt];
+
+		counter->key = gator_events_get_key();
+		counter->enabled = 0;
+		counter->cores = -1;
+	}
+}
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
new file mode 100644
index 000000000000..e7082e62fe88
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.h
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#if !defined(GATOR_EVENTS_MALI_COMMON_H)
+#define GATOR_EVENTS_MALI_COMMON_H
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+/* Ensure that MALI_SUPPORT has been defined to something. */
+#ifndef MALI_SUPPORT
+#error MALI_SUPPORT not defined!
+#endif
+
+/* Values for the supported activity event types */
+#define ACTIVITY_START  (1)
+#define ACTIVITY_STOP   (2)
+
+/*
+ * Runtime state information for a counter.
+ */
+struct mali_counter {
+	/* 'key' (a unique id set by gatord and returned by gator.ko) */
+	unsigned long key;
+	/* counter enable state */
+	unsigned long enabled;
+	/* for activity counters, the number of cores, otherwise -1 */
+	unsigned long cores;
+};
+
+/*
+ * Mali-4xx
+ */
+typedef int mali_profiling_set_event_type(unsigned int, int);
+typedef void mali_profiling_control_type(unsigned int, unsigned int);
+
+/*
+ * Driver entry points for functions called directly by gator.
+ */
+extern int _mali_profiling_set_event(unsigned int, int);
+extern void _mali_profiling_control(unsigned int, unsigned int);
+extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+
+/**
+ * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and
+ * associate the key/enable values with this entry point.
+ *
+ * @param event_name The name of the event.
+ * @param sb Linux super block
+ * @param root Directory under which the entry will be created.
+ * @param counter_key Ptr to location which will be associated with the counter key.
+ * @param counter_enabled Ptr to location which will be associated with the counter enable state.
+ *
+ * @return 0 if entry point was created, non-zero if not.
+ */
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, struct mali_counter *counter, unsigned long *event);
+
+/**
+ * Initializes the counter array.
+ *
+ * @param keys The array of counters
+ * @param n_counters The number of entries in each of the arrays.
+ */
+extern void gator_mali_initialise_counters(struct mali_counter counters[], unsigned int n_counters);
+
+#endif /* GATOR_EVENTS_MALI_COMMON_H  */
diff --git a/drivers/gator/gator_events_mali_midgard.c b/drivers/gator/gator_events_mali_midgard.c
new file mode 100644
index 000000000000..0aec906d7ae5
--- /dev/null
+++ b/drivers/gator/gator_events_mali_midgard.c
@@ -0,0 +1,562 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#ifdef MALI_DIR_MIDGARD
+/* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
+#include "mali_linux_trace.h"
+#else
+/* Old DDK Directory structure with kernel/drivers/gpu/arm/t6xx*/
+#include "linux/mali_linux_trace.h"
+#endif
+
+#include "gator_events_mali_common.h"
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_MIDGARD)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_MIDGARD
+#endif
+
+static const char mali_name[] = "Midgard";
+
+/* Counters for Mali-Midgard:
+ *
+ *  - Timeline events
+ *    They are tracepoints, but instead of reporting a number they report a START/STOP event.
+ *    They are reported in Streamline as number of microseconds while that particular counter was active.
+ *
+ *  - SW counters
+ *    They are tracepoints reporting a particular number.
+ *    They are accumulated in sw_counter_data array until they are passed to Streamline, then they are zeroed.
+ *
+ *  - Accumulators
+ *    They are the same as software counters but their value is not zeroed.
+ */
+
+/* Timeline (start/stop) activity */
+static const char *const timeline_event_names[] = {
+	"PM_SHADER_0",
+	"PM_SHADER_1",
+	"PM_SHADER_2",
+	"PM_SHADER_3",
+	"PM_SHADER_4",
+	"PM_SHADER_5",
+	"PM_SHADER_6",
+	"PM_SHADER_7",
+	"PM_TILER_0",
+	"PM_L2_0",
+	"PM_L2_1",
+	"MMU_AS_0",
+	"MMU_AS_1",
+	"MMU_AS_2",
+	"MMU_AS_3"
+};
+
+enum {
+	PM_SHADER_0 = 0,
+	PM_SHADER_1,
+	PM_SHADER_2,
+	PM_SHADER_3,
+	PM_SHADER_4,
+	PM_SHADER_5,
+	PM_SHADER_6,
+	PM_SHADER_7,
+	PM_TILER_0,
+	PM_L2_0,
+	PM_L2_1,
+	MMU_AS_0,
+	MMU_AS_1,
+	MMU_AS_2,
+	MMU_AS_3
+};
+/* The number of shader blocks in the enum above */
+#define NUM_PM_SHADER (8)
+
+/* Software Counters */
+static const char *const software_counter_names[] = {
+	"MMU_PAGE_FAULT_0",
+	"MMU_PAGE_FAULT_1",
+	"MMU_PAGE_FAULT_2",
+	"MMU_PAGE_FAULT_3"
+};
+
+enum {
+	MMU_PAGE_FAULT_0 = 0,
+	MMU_PAGE_FAULT_1,
+	MMU_PAGE_FAULT_2,
+	MMU_PAGE_FAULT_3
+};
+
+/* Software Counters */
+static const char *const accumulators_names[] = {
+	"TOTAL_ALLOC_PAGES"
+};
+
+enum {
+	TOTAL_ALLOC_PAGES = 0
+};
+
+#define FIRST_TIMELINE_EVENT (0)
+#define NUMBER_OF_TIMELINE_EVENTS (sizeof(timeline_event_names) / sizeof(timeline_event_names[0]))
+#define FIRST_SOFTWARE_COUNTER (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS)
+#define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0]))
+#define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS)
+#define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0]))
+#define FILMSTRIP (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS)
+#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS + 1)
+
+/*
+ * gatorfs variables for counter enable state
+ */
+static struct mali_counter counters[NUMBER_OF_EVENTS];
+static unsigned long filmstrip_event;
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static int counter_dump[NUMBER_OF_EVENTS * 2];
+
+/*
+ * Array holding counter start times (in ns) for each counter. A zero
+ * here indicates that the activity monitored by this counter is not
+ * running.
+ */
+static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS];
+
+/* The data we have recorded */
+static unsigned int timeline_data[NUMBER_OF_TIMELINE_EVENTS];
+static unsigned int sw_counter_data[NUMBER_OF_SOFTWARE_COUNTERS];
+static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS];
+
+/* Hold the previous timestamp, used to calculate the sample interval. */
+static struct timespec prev_timestamp;
+
+/**
+ * Returns the timespan (in microseconds) between the two specified timestamps.
+ *
+ * @param start Ptr to the start timestamp
+ * @param end Ptr to the end timestamp
+ *
+ * @return Number of microseconds between the two timestamps (can be negative if start follows end).
+ */
+static inline long get_duration_us(const struct timespec *start, const struct timespec *end)
+{
+	long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000;
+
+	event_duration_us += (end->tv_sec - start->tv_sec) * 1000000;
+
+	return event_duration_us;
+}
+
+static void record_timeline_event(unsigned int timeline_index, unsigned int type)
+{
+	struct timespec event_timestamp;
+	struct timespec *event_start = &timeline_event_starttime[timeline_index];
+
+	switch (type) {
+	case ACTIVITY_START:
+		/* Get the event time... */
+		getnstimeofday(&event_timestamp);
+
+		/* Remember the start time if the activity is not already started */
+		if (event_start->tv_sec == 0)
+			*event_start = event_timestamp;	/* Structure copy */
+		break;
+
+	case ACTIVITY_STOP:
+		/* if the counter was started... */
+		if (event_start->tv_sec != 0) {
+			/* Get the event time... */
+			getnstimeofday(&event_timestamp);
+
+			/* Accumulate the duration in us */
+			timeline_data[timeline_index] += get_duration_us(event_start, &event_timestamp);
+
+			/* Reset the start time to indicate the activity is stopped. */
+			event_start->tv_sec = 0;
+		}
+		break;
+
+	default:
+		/* Other activity events are ignored. */
+		break;
+	}
+}
+
+/*
+ * Documentation about the following tracepoints is in mali_linux_trace.h
+ */
+
+GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value))
+{
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define BIT_AT(value, pos) ((value >> pos) & 1)
+
+	static unsigned long long previous_shader_bitmask;
+	static unsigned long long previous_tiler_bitmask;
+	static unsigned long long previous_l2_bitmask;
+
+	switch (event_id) {
+	case SHADER_PRESENT_LO:
+		{
+			unsigned long long changed_bitmask = previous_shader_bitmask ^ value;
+			int pos;
+
+			for (pos = 0; pos < NUM_PM_SHADER; ++pos) {
+				if (BIT_AT(changed_bitmask, pos))
+					record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP);
+			}
+
+			previous_shader_bitmask = value;
+			break;
+		}
+
+	case TILER_PRESENT_LO:
+		{
+			unsigned long long changed = previous_tiler_bitmask ^ value;
+
+			if (BIT_AT(changed, 0))
+				record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+
+			previous_tiler_bitmask = value;
+			break;
+		}
+
+	case L2_PRESENT_LO:
+		{
+			unsigned long long changed = previous_l2_bitmask ^ value;
+
+			if (BIT_AT(changed, 0))
+				record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+			if (BIT_AT(changed, 4))
+				record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP);
+
+			previous_l2_bitmask = value;
+			break;
+		}
+
+	default:
+		/* No other blocks are supported at present */
+		break;
+	}
+
+#undef SHADER_PRESENT_LO
+#undef TILER_PRESENT_LO
+#undef L2_PRESENT_LO
+#undef BIT_AT
+}
+
+GATOR_DEFINE_PROBE(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value))
+{
+	/* We add to the previous since we may receive many tracepoints in one sample period */
+	sw_counter_data[MMU_PAGE_FAULT_0 + event_id] += value;
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_in_use, TP_PROTO(int event_id))
+{
+	record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_START);
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_released, TP_PROTO(int event_id))
+{
+	record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_STOP);
+}
+
+GATOR_DEFINE_PROBE(mali_total_alloc_pages_change, TP_PROTO(long long int event_id))
+{
+	accumulators_data[TOTAL_ALLOC_PAGES] = event_id;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	int event;
+	/*
+	 * Create the filesystem for all events
+	 */
+	int counter_index = 0;
+	mali_profiling_control_type *mali_control;
+
+	for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
+		if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0)
+			return -1;
+		counter_index++;
+	}
+	counter_index = 0;
+	for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) {
+		if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
+			return -1;
+		counter_index++;
+	}
+	counter_index = 0;
+	for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) {
+		if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0)
+			return -1;
+		counter_index++;
+	}
+
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0)
+			return -1;
+		symbol_put(_mali_profiling_control);
+	}
+
+	return 0;
+}
+
+static int register_tracepoints(void)
+{
+	if (GATOR_REGISTER_TRACE(mali_pm_status)) {
+		pr_debug("gator: Mali-Midgard: mali_pm_status tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) {
+		pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) {
+		pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) {
+		pr_debug("gator: Mali-Midgard: mali_mmu_as_released tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) {
+		pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint failed to activate\n");
+		return 0;
+	}
+
+	pr_debug("gator: Mali-Midgard: start\n");
+	pr_debug("gator: Mali-Midgard: mali_pm_status probe is at %p\n", &probe_mali_pm_status);
+	pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released);
+	pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change);
+
+	return 1;
+}
+
+static int start(void)
+{
+	unsigned int cnt;
+	mali_profiling_control_type *mali_control;
+
+	/* Clean all data for the next capture */
+	for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) {
+		timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0;
+		timeline_data[cnt] = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++)
+		sw_counter_data[cnt] = 0;
+
+	for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++)
+		accumulators_data[cnt] = 0;
+
+	/* Register tracepoints */
+	if (register_tracepoints() == 0)
+		return -1;
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		/* The event attribute in the XML file keeps the actual frame rate. */
+		unsigned int enabled = counters[FILMSTRIP].enabled ? 1 : 0;
+		unsigned int rate = filmstrip_event & 0xff;
+		unsigned int resize_factor = (filmstrip_event >> 8) & 0xff;
+
+		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+		mali_control(FBDUMP_CONTROL_ENABLE, enabled);
+		mali_control(FBDUMP_CONTROL_RATE, rate);
+		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+		pr_debug("gator: sent mali_control enabled=%d, rate=%d, resize_factor=%d\n", enabled, rate, resize_factor);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		pr_err("gator: mali online _mali_profiling_control symbol not found\n");
+	}
+
+	/*
+	 * Set the first timestamp for calculating the sample interval. The first interval could be quite long,
+	 * since it will be the time between 'start' and the first 'read'.
+	 * This means that timeline values will be divided by a big number for the first sample.
+	 */
+	getnstimeofday(&prev_timestamp);
+
+	return 0;
+}
+
+static void stop(void)
+{
+	mali_profiling_control_type *mali_control;
+
+	pr_debug("gator: Mali-Midgard: stop\n");
+
+	/*
+	 * It is safe to unregister traces even if they were not successfully
+	 * registered, so no need to check.
+	 */
+	GATOR_UNREGISTER_TRACE(mali_pm_status);
+	pr_debug("gator: Mali-Midgard: mali_pm_status tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages);
+	pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_mmu_as_released);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_released tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change);
+	pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint deactivated\n");
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+		mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		pr_err("gator: mali offline _mali_profiling_control symbol not found\n");
+	}
+}
+
+static int read(int **buffer, bool sched_switch)
+{
+	int cnt;
+	int len = 0;
+	long sample_interval_us = 0;
+	struct timespec read_timestamp;
+
+	if (!on_primary_core())
+		return 0;
+
+	/* Get the start of this sample period. */
+	getnstimeofday(&read_timestamp);
+
+	/*
+	 * Calculate the sample interval if the previous sample time is valid.
+	 * We use tv_sec since it will not be 0.
+	 */
+	if (prev_timestamp.tv_sec != 0)
+		sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp);
+
+	/* Structure copy. Update the previous timestamp. */
+	prev_timestamp = read_timestamp;
+
+	/*
+	 * Report the timeline counters (ACTIVITY_START/STOP)
+	 */
+	for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) {
+		struct mali_counter *counter = &counters[cnt];
+
+		if (counter->enabled) {
+			const int index = cnt - FIRST_TIMELINE_EVENT;
+			unsigned int value;
+
+			/* If the activity is still running, reset its start time to the
+			 * start of this sample period to correct the count. Add the
+			 * time up to the end of the sample onto the count.
+			 */
+			if (timeline_event_starttime[index].tv_sec != 0) {
+				const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp);
+
+				timeline_data[index] += event_duration;
+				timeline_event_starttime[index] = read_timestamp;	/* Activity is still running. */
+			}
+
+			if (sample_interval_us != 0) {
+				/* Convert the counter to a percent-of-sample value */
+				value = (timeline_data[index] * 100) / sample_interval_us;
+			} else {
+				pr_debug("gator: Mali-Midgard: setting value to zero\n");
+				value = 0;
+			}
+
+			/* Clear the counter value ready for the next sample. */
+			timeline_data[index] = 0;
+
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = value;
+		}
+	}
+
+	/* Report the software counters */
+	for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) {
+		const struct mali_counter *counter = &counters[cnt];
+
+		if (counter->enabled) {
+			const int index = cnt - FIRST_SOFTWARE_COUNTER;
+
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = sw_counter_data[index];
+			/* Set the value to zero for the next time */
+			sw_counter_data[index] = 0;
+		}
+	}
+
+	/* Report the accumulators */
+	for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) {
+		const struct mali_counter *counter = &counters[cnt];
+
+		if (counter->enabled) {
+			const int index = cnt - FIRST_ACCUMULATOR;
+
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = accumulators_data[index];
+			/* Do not zero the accumulator */
+		}
+	}
+
+	/* Update the buffer */
+	if (buffer)
+		*buffer = counter_dump;
+
+	return len;
+}
+
+static struct gator_interface gator_events_mali_midgard_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read
+};
+
+extern int gator_events_mali_midgard_init(void)
+{
+	pr_debug("gator: Mali-Midgard: sw_counters init\n");
+
+	gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS);
+
+	return gator_events_install(&gator_events_mali_midgard_interface);
+}
diff --git a/drivers/gator/gator_events_mali_midgard_hw.c b/drivers/gator/gator_events_mali_midgard_hw.c
new file mode 100644
index 000000000000..c8065da56815
--- /dev/null
+++ b/drivers/gator/gator_events_mali_midgard_hw.c
@@ -0,0 +1,977 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+/* Mali Midgard DDK includes */
+#if defined(MALI_SIMPLE_API)
+/* Header with wrapper functions to kbase structures and functions */
+#include "mali/mali_kbase_gator_api.h"
+#elif defined(MALI_DIR_MIDGARD)
+/* New DDK Directory structure with kernel/drivers/gpu/arm/midgard */
+#include "mali_linux_trace.h"
+#include "mali_kbase.h"
+#include "mali_kbase_mem_linux.h"
+#else
+/* Old DDK Directory structure with kernel/drivers/gpu/arm/t6xx */
+#include "linux/mali_linux_trace.h"
+#include "kbase/src/common/mali_kbase.h"
+#include "kbase/src/linux/mali_kbase_mem_linux.h"
+#endif
+
+/* If API version is not specified then assume API version 1. */
+#ifndef MALI_DDK_GATOR_API_VERSION
+#define MALI_DDK_GATOR_API_VERSION 1
+#endif
+
+#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2) && (MALI_DDK_GATOR_API_VERSION != 3)
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3/r4 DDK, or 3 for r5 and later DDK).
+#endif
+
+#include "gator_events_mali_common.h"
+
+/*
+ * Mali-Midgard
+ */
+#if MALI_DDK_GATOR_API_VERSION == 3
+static uint32_t (*kbase_gator_instr_hwcnt_dump_irq_symbol)(struct kbase_gator_hwcnt_handles *);
+static uint32_t (*kbase_gator_instr_hwcnt_dump_complete_symbol)(struct kbase_gator_hwcnt_handles *, uint32_t *const);
+static struct kbase_gator_hwcnt_handles *(*kbase_gator_hwcnt_init_symbol)(struct kbase_gator_hwcnt_info *);
+static void (*kbase_gator_hwcnt_term_symbol)(struct kbase_gator_hwcnt_info *, struct kbase_gator_hwcnt_handles *);
+
+#else
+static struct kbase_device *(*kbase_find_device_symbol)(int);
+static struct kbase_context *(*kbase_create_context_symbol)(struct kbase_device *);
+static void (*kbase_destroy_context_symbol)(struct kbase_context *);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+static void *(*kbase_va_alloc_symbol)(struct kbase_context *, u32);
+static void (*kbase_va_free_symbol)(struct kbase_context *, void *);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+static void *(*kbase_va_alloc_symbol)(struct kbase_context *, u32, struct kbase_hwc_dma_mapping *);
+static void (*kbase_va_free_symbol)(struct kbase_context *, struct kbase_hwc_dma_mapping *);
+#endif
+
+static mali_error (*kbase_instr_hwcnt_enable_symbol)(struct kbase_context *, struct kbase_uk_hwcnt_setup *);
+static mali_error (*kbase_instr_hwcnt_disable_symbol)(struct kbase_context *);
+static mali_error (*kbase_instr_hwcnt_clear_symbol)(struct kbase_context *);
+static mali_error (*kbase_instr_hwcnt_dump_irq_symbol)(struct kbase_context *);
+static mali_bool (*kbase_instr_hwcnt_dump_complete_symbol)(struct kbase_context *, mali_bool *);
+
+static long shader_present_low;
+#endif
+
+/** The interval between reads, in ns.
+ *
+ * Earlier we introduced a 'hold off for 1ms after last read' to
+ * resolve MIDBASE-2178 and MALINE-724. However, the 1ms hold off is
+ * too long if no context switches occur as there is a race between
+ * this value and the tick of the read clock in gator which is also
+ * 1ms. If we 'miss' the current read, the counter values are
+ * effectively 'spread' over 2ms and the values seen are half what
+ * they should be (since Streamline averages over sample time). In the
+ * presence of context switches this spread can vary and markedly
+ * affect the counters. Currently there is no 'proper' solution to
+ * this, but empirically we have found that reducing the minimum read
+ * interval to 950us causes the counts to be much more stable.
+ */
+static const int READ_INTERVAL_NSEC = 950000;
+
+#if GATOR_TEST
+#include "gator_events_mali_midgard_hw_test.c"
+#endif
+
+#if MALI_DDK_GATOR_API_VERSION != 3
+/* Blocks for HW counters */
+enum {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_BLOCK
+};
+#endif
+
+static const char *mali_name;
+
+/* Counters for Mali-Midgard:
+ *
+ *    For HW counters we need strings to create /dev/gator/events files.
+ *    Enums are not needed because the position of the HW name in the array is the same
+ *    of the corresponding value in the received block of memory.
+ *    HW counters are requested by calculating a bitmask, passed then to the driver.
+ *    Every millisecond a HW counters dump is requested, and if the previous has been completed they are read.
+ */
+
+/* Hardware Counters */
+#if MALI_DDK_GATOR_API_VERSION == 3
+
+static const char *const *hardware_counter_names;
+static int number_of_hardware_counters;
+
+#else
+
+static const char *const hardware_counter_names[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"MESSAGES_SENT",
+	"MESSAGES_RECEIVED",
+	"GPU_ACTIVE",		/* 6 */
+	"IRQ_ACTIVE",
+	"JS0_JOBS",
+	"JS0_TASKS",
+	"JS0_ACTIVE",
+	"",
+	"JS0_WAIT_READ",
+	"JS0_WAIT_ISSUE",
+	"JS0_WAIT_DEPEND",
+	"JS0_WAIT_FINISH",
+	"JS1_JOBS",
+	"JS1_TASKS",
+	"JS1_ACTIVE",
+	"",
+	"JS1_WAIT_READ",
+	"JS1_WAIT_ISSUE",
+	"JS1_WAIT_DEPEND",
+	"JS1_WAIT_FINISH",
+	"JS2_JOBS",
+	"JS2_TASKS",
+	"JS2_ACTIVE",
+	"",
+	"JS2_WAIT_READ",
+	"JS2_WAIT_ISSUE",
+	"JS2_WAIT_DEPEND",
+	"JS2_WAIT_FINISH",
+	"JS3_JOBS",
+	"JS3_TASKS",
+	"JS3_ACTIVE",
+	"",
+	"JS3_WAIT_READ",
+	"JS3_WAIT_ISSUE",
+	"JS3_WAIT_DEPEND",
+	"JS3_WAIT_FINISH",
+	"JS4_JOBS",
+	"JS4_TASKS",
+	"JS4_ACTIVE",
+	"",
+	"JS4_WAIT_READ",
+	"JS4_WAIT_ISSUE",
+	"JS4_WAIT_DEPEND",
+	"JS4_WAIT_FINISH",
+	"JS5_JOBS",
+	"JS5_TASKS",
+	"JS5_ACTIVE",
+	"",
+	"JS5_WAIT_READ",
+	"JS5_WAIT_ISSUE",
+	"JS5_WAIT_DEPEND",
+	"JS5_WAIT_FINISH",
+	"JS6_JOBS",
+	"JS6_TASKS",
+	"JS6_ACTIVE",
+	"",
+	"JS6_WAIT_READ",
+	"JS6_WAIT_ISSUE",
+	"JS6_WAIT_DEPEND",
+	"JS6_WAIT_FINISH",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"JOBS_PROCESSED",
+	"TRIANGLES",
+	"QUADS",
+	"POLYGONS",
+	"POINTS",
+	"LINES",
+	"VCACHE_HIT",
+	"VCACHE_MISS",
+	"FRONT_FACING",
+	"BACK_FACING",
+	"PRIM_VISIBLE",
+	"PRIM_CULLED",
+	"PRIM_CLIPPED",
+	"LEVEL0",
+	"LEVEL1",
+	"LEVEL2",
+	"LEVEL3",
+	"LEVEL4",
+	"LEVEL5",
+	"LEVEL6",
+	"LEVEL7",
+	"COMMAND_1",
+	"COMMAND_2",
+	"COMMAND_3",
+	"COMMAND_4",
+	"COMMAND_4_7",
+	"COMMAND_8_15",
+	"COMMAND_16_63",
+	"COMMAND_64",
+	"COMPRESS_IN",
+	"COMPRESS_OUT",
+	"COMPRESS_FLUSH",
+	"TIMESTAMPS",
+	"PCACHE_HIT",
+	"PCACHE_MISS",
+	"PCACHE_LINE",
+	"PCACHE_STALL",
+	"WRBUF_HIT",
+	"WRBUF_MISS",
+	"WRBUF_LINE",
+	"WRBUF_PARTIAL",
+	"WRBUF_STALL",
+	"ACTIVE",
+	"LOADING_DESC",
+	"INDEX_WAIT",
+	"INDEX_RANGE_WAIT",
+	"VERTEX_WAIT",
+	"PCACHE_WAIT",
+	"WRBUF_WAIT",
+	"BUS_READ",
+	"BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"UTLB_STALL",
+	"UTLB_REPLAY_MISS",
+	"UTLB_REPLAY_FULL",
+	"UTLB_NEW_MISS",
+	"UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"SHADER_CORE_ACTIVE",
+	"FRAG_ACTIVE",
+	"FRAG_PRIMATIVES",
+	"FRAG_PRIMATIVES_DROPPED",
+	"FRAG_CYCLE_DESC",
+	"FRAG_CYCLES_PLR",
+	"FRAG_CYCLES_VERT",
+	"FRAG_CYCLES_TRISETUP",
+	"FRAG_CYCLES_RAST",
+	"FRAG_THREADS",
+	"FRAG_DUMMY_THREADS",
+	"FRAG_QUADS_RAST",
+	"FRAG_QUADS_EZS_TEST",
+	"FRAG_QUADS_EZS_KILLED",
+	"FRAG_QUADS_LZS_TEST",
+	"FRAG_QUADS_LZS_KILLED",
+	"FRAG_CYCLE_NO_TILE",
+	"FRAG_NUM_TILES",
+	"FRAG_TRANS_ELIM",
+	"COMPUTE_ACTIVE",
+	"COMPUTE_TASKS",
+	"COMPUTE_THREADS",
+	"COMPUTE_CYCLES_DESC",
+	"TRIPIPE_ACTIVE",
+	"ARITH_WORDS",
+	"ARITH_CYCLES_REG",
+	"ARITH_CYCLES_L0",
+	"ARITH_FRAG_DEPEND",
+	"LS_WORDS",
+	"LS_ISSUES",
+	"LS_RESTARTS",
+	"LS_REISSUES_MISS",
+	"LS_REISSUES_VD",
+	"LS_REISSUE_ATTRIB_MISS",
+	"LS_NO_WB",
+	"TEX_WORDS",
+	"TEX_BUBBLES",
+	"TEX_WORDS_L0",
+	"TEX_WORDS_DESC",
+	"TEX_THREADS",
+	"TEX_RECIRC_FMISS",
+	"TEX_RECIRC_DESC",
+	"TEX_RECIRC_MULTI",
+	"TEX_RECIRC_PMISS",
+	"TEX_RECIRC_CONF",
+	"LSC_READ_HITS",
+	"LSC_READ_MISSES",
+	"LSC_WRITE_HITS",
+	"LSC_WRITE_MISSES",
+	"LSC_ATOMIC_HITS",
+	"LSC_ATOMIC_MISSES",
+	"LSC_LINE_FETCHES",
+	"LSC_DIRTY_LINE",
+	"LSC_SNOOPS",
+	"AXI_TLB_STALL",
+	"AXI_TLB_MIESS",
+	"AXI_TLB_TRANSACTION",
+	"LS_TLB_MISS",
+	"LS_TLB_HIT",
+	"AXI_BEATS_READ",
+	"AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"MMU_HIT",
+	"MMU_NEW_MISS",
+	"MMU_REPLAY_FULL",
+	"MMU_REPLAY_MISS",
+	"MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"UTLB_HIT",
+	"UTLB_NEW_MISS",
+	"UTLB_REPLAY_FULL",
+	"UTLB_REPLAY_MISS",
+	"UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"L2_WRITE_BEATS",
+	"L2_READ_BEATS",
+	"L2_ANY_LOOKUP",
+	"L2_READ_LOOKUP",
+	"L2_SREAD_LOOKUP",
+	"L2_READ_REPLAY",
+	"L2_READ_SNOOP",
+	"L2_READ_HIT",
+	"L2_CLEAN_MISS",
+	"L2_WRITE_LOOKUP",
+	"L2_SWRITE_LOOKUP",
+	"L2_WRITE_REPLAY",
+	"L2_WRITE_SNOOP",
+	"L2_WRITE_HIT",
+	"L2_EXT_READ_FULL",
+	"L2_EXT_READ_HALF",
+	"L2_EXT_WRITE_FULL",
+	"L2_EXT_WRITE_HALF",
+	"L2_EXT_READ",
+	"L2_EXT_READ_LINE",
+	"L2_EXT_WRITE",
+	"L2_EXT_WRITE_LINE",
+	"L2_EXT_WRITE_SMALL",
+	"L2_EXT_BARRIER",
+	"L2_EXT_AR_STALL",
+	"L2_EXT_R_BUF_FULL",
+	"L2_EXT_RD_BUF_FULL",
+	"L2_EXT_R_RAW",
+	"L2_EXT_W_STALL",
+	"L2_EXT_W_BUF_FULL",
+	"L2_EXT_R_W_HAZARD",
+	"L2_TAG_HAZARD",
+	"L2_SNOOP_FULL",
+	"L2_REPLAY_FULL"
+};
+
+static const int number_of_hardware_counters = ARRAY_SIZE(hardware_counter_names);
+
+#endif
+
+#define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
+#define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+/* Opaque handles for kbase_context and kbase_hwc_dma_mapping */
+static struct kbase_gator_hwcnt_handles *handles;
+
+/* Information about hardware counters */
+static struct kbase_gator_hwcnt_info *in_out_info;
+
+#else
+/* Memory to dump hardware counters into */
+static void *kernel_dump_buffer;
+
+#if MALI_DDK_GATOR_API_VERSION == 2
+/* DMA state used to manage lifetime of the buffer */
+struct kbase_hwc_dma_mapping kernel_dump_buffer_handle;
+#endif
+
+/* kbase context and device */
+static struct kbase_context *kbcontext;
+static struct kbase_device *kbdevice;
+
+/*
+ * The following function has no external prototype in older DDK
+ * revisions. When the DDK is updated then this should be removed.
+ */
+struct kbase_device *kbase_find_device(int minor);
+#endif
+
+static volatile bool kbase_device_busy;
+static unsigned int num_hardware_counters_enabled;
+
+/* gatorfs variables for counter enable state */
+static struct mali_counter *counters;
+
+/* An array used to return the data we recorded as key,value pairs */
+static int *counter_dump;
+
+extern struct mali_counter mali_activity[3];
+
+static const char *const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+	"opencl",
+};
+
+#define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
+	do { \
+		if (FUNCTION ## _symbol) { \
+			pr_err("gator: mali " #FUNCTION " symbol was already registered\n"); \
+			(ERROR_COUNT)++; \
+		} else { \
+			FUNCTION ## _symbol = symbol_get(FUNCTION); \
+			if (!FUNCTION ## _symbol) { \
+				pr_err("gator: mali online " #FUNCTION " symbol not found\n"); \
+				(ERROR_COUNT)++; \
+			} \
+		} \
+	} while (0)
+
+#define SYMBOL_CLEANUP(FUNCTION) \
+	do { \
+		if (FUNCTION ## _symbol) { \
+			symbol_put(FUNCTION); \
+			FUNCTION ## _symbol = NULL; \
+		} \
+	} while (0)
+
+/**
+ * Execute symbol_get for all the Mali symbols and check for success.
+ * @return the number of symbols not loaded.
+ */
+static int init_symbols(void)
+{
+	int error_count = 0;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_gator_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_gator_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(kbase_gator_hwcnt_init, error_count);
+	SYMBOL_GET(kbase_gator_hwcnt_term, error_count);
+#else
+	SYMBOL_GET(kbase_find_device, error_count);
+	SYMBOL_GET(kbase_create_context, error_count);
+	SYMBOL_GET(kbase_va_alloc, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_enable, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_clear, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_disable, error_count);
+	SYMBOL_GET(kbase_va_free, error_count);
+	SYMBOL_GET(kbase_destroy_context, error_count);
+#endif
+
+	return error_count;
+}
+
+/**
+ * Execute symbol_put for all the registered Mali symbols.
+ */
+static void clean_symbols(void)
+{
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_CLEANUP(kbase_gator_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_gator_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_init);
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_term);
+#else
+	SYMBOL_CLEANUP(kbase_find_device);
+	SYMBOL_CLEANUP(kbase_create_context);
+	SYMBOL_CLEANUP(kbase_va_alloc);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_enable);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_clear);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_disable);
+	SYMBOL_CLEANUP(kbase_va_free);
+	SYMBOL_CLEANUP(kbase_destroy_context);
+#endif
+}
+
+/**
+ * Determines whether a read should take place
+ * @param current_time The current time, obtained from getnstimeofday()
+ * @param prev_time_s The number of seconds at the previous read attempt.
+ * @param next_read_time_ns The time (in ns) when the next read should be allowed.
+ *
+ * Note that this function has been separated out here to allow it to be tested.
+ */
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns)
+{
+	/* If the current ns count rolls over a second, roll the next read time too. */
+	if (current_time->tv_sec != *prev_time_s)
+		*next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC;
+
+	/* Abort the read if the next read time has not arrived. */
+	if (current_time->tv_nsec < *next_read_time_ns)
+		return 0;
+
+	/* Set the next read some fixed time after this one, and update the read timestamp. */
+	*next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC;
+
+	*prev_time_s = current_time->tv_sec;
+	return 1;
+}
+
+static int start(void)
+{
+#if MALI_DDK_GATOR_API_VERSION != 3
+	struct kbase_uk_hwcnt_setup setup;
+	unsigned long long shadersPresent = 0;
+	u16 bitmask[] = { 0, 0, 0, 0 };
+	mali_error err;
+#endif
+	int cnt;
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	/* Setup HW counters */
+	num_hardware_counters_enabled = 0;
+
+	/* Declare and initialise kbase_gator_hwcnt_info structure */
+	in_out_info = kmalloc(sizeof(*in_out_info), GFP_KERNEL);
+	for (cnt = 0; cnt < ARRAY_SIZE(in_out_info->bitmask); cnt++)
+		in_out_info->bitmask[cnt] = 0;
+
+	/* Calculate enable bitmasks based on counters_enabled array */
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+		if (counters[cnt].enabled) {
+			int block = GET_HW_BLOCK(cnt);
+			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+
+			in_out_info->bitmask[block] |= (1 << enable_bit);
+			pr_debug("gator: Mali-Midgard: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+			num_hardware_counters_enabled++;
+		}
+	}
+
+	/* Create a kbase context for HW counters */
+	if (num_hardware_counters_enabled > 0) {
+		if (init_symbols() > 0) {
+			clean_symbols();
+			/* No Mali driver code entrypoints found - not a fault. */
+			return 0;
+		}
+
+		handles = kbase_gator_hwcnt_init_symbol(in_out_info);
+
+		if (handles == NULL)
+			goto out;
+
+		kbase_device_busy = false;
+	}
+
+	return 0;
+#else
+	/* Setup HW counters */
+	num_hardware_counters_enabled = 0;
+
+	/* Calculate enable bitmasks based on counters_enabled array */
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+		const struct mali_counter *counter = &counters[cnt];
+
+		if (counter->enabled) {
+			int block = GET_HW_BLOCK(cnt);
+			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+
+			bitmask[block] |= (1 << enable_bit);
+			pr_debug("gator: Mali-Midgard: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+			num_hardware_counters_enabled++;
+		}
+	}
+
+	/* Create a kbase context for HW counters */
+	if (num_hardware_counters_enabled > 0) {
+		if (init_symbols() > 0) {
+			clean_symbols();
+			/* No Mali driver code entrypoints found - not a fault. */
+			return 0;
+		}
+
+		kbdevice = kbase_find_device_symbol(-1);
+
+		/* If we already got a context, fail */
+		if (kbcontext) {
+			pr_debug("gator: Mali-Midgard: error context already present\n");
+			goto out;
+		}
+
+		/* kbcontext will only be valid after all the Mali symbols are loaded successfully */
+		kbcontext = kbase_create_context_symbol(kbdevice);
+		if (!kbcontext) {
+			pr_debug("gator: Mali-Midgard: error creating kbase context\n");
+			goto out;
+		}
+
+		/* See if we can get the number of shader cores */
+		shadersPresent = kbdevice->shader_present_bitmap;
+		shader_present_low = (unsigned long)shadersPresent;
+
+		/*
+		 * The amount of memory needed to store the dump (bytes)
+		 * DUMP_SIZE = number of core groups
+		 *             * number of blocks (always 8 for midgard)
+		 *             * number of counters per block (always 64 for midgard)
+		 *             * number of bytes per counter (always 4 in midgard)
+		 * For a Mali-Midgard with a single core group = 1 * 8 * 64 * 4 = 2048
+		 * For a Mali-Midgard with a dual core group   = 2 * 8 * 64 * 4 = 4096
+		 */
+#if MALI_DDK_GATOR_API_VERSION == 1
+		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle);
+#endif
+		if (!kernel_dump_buffer) {
+			pr_debug("gator: Mali-Midgard: error trying to allocate va\n");
+			goto destroy_context;
+		}
+
+		setup.dump_buffer = (uintptr_t)kernel_dump_buffer;
+		setup.jm_bm = bitmask[JM_BLOCK];
+		setup.tiler_bm = bitmask[TILER_BLOCK];
+		setup.shader_bm = bitmask[SHADER_BLOCK];
+		setup.mmu_l2_bm = bitmask[MMU_BLOCK];
+		/* These counters do not exist on Mali-T60x */
+		setup.l3_cache_bm = 0;
+
+		/* Use kbase API to enable hardware counters and provide dump buffer */
+		err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup);
+		if (err != MALI_ERROR_NONE) {
+			pr_debug("gator: Mali-Midgard: can't setup hardware counters\n");
+			goto free_buffer;
+		}
+		pr_debug("gator: Mali-Midgard: hardware counters enabled\n");
+		kbase_instr_hwcnt_clear_symbol(kbcontext);
+		pr_debug("gator: Mali-Midgard: hardware counters cleared\n");
+
+		kbase_device_busy = false;
+	}
+
+	return 0;
+
+free_buffer:
+#if MALI_DDK_GATOR_API_VERSION == 1
+	kbase_va_free_symbol(kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+	kbase_va_free_symbol(kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+destroy_context:
+	kbase_destroy_context_symbol(kbcontext);
+#endif
+
+out:
+	clean_symbols();
+	return -1;
+}
+
+static void stop(void)
+{
+	unsigned int cnt;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	struct kbase_gator_hwcnt_handles *temp_hand;
+#else
+	struct kbase_context *temp_kbcontext;
+#endif
+
+	pr_debug("gator: Mali-Midgard: stop\n");
+
+	/* Set all counters as disabled */
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++)
+		counters[cnt].enabled = 0;
+
+	/* Destroy the context for HW counters */
+#if MALI_DDK_GATOR_API_VERSION == 3
+	if (num_hardware_counters_enabled > 0 && handles != NULL) {
+		/*
+		 * Set the global variable to NULL before destroying it, because
+		 * other function will check this before using it.
+		 */
+		temp_hand = handles;
+		handles = NULL;
+
+		kbase_gator_hwcnt_term_symbol(in_out_info, temp_hand);
+
+		kfree(in_out_info);
+
+#else
+	if (num_hardware_counters_enabled > 0 && kbcontext != NULL) {
+		/*
+		 * Set the global variable to NULL before destroying it, because
+		 * other function will check this before using it.
+		 */
+		temp_kbcontext = kbcontext;
+		kbcontext = NULL;
+
+		kbase_instr_hwcnt_disable_symbol(temp_kbcontext);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+		kbase_va_free_symbol(temp_kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+		kbase_va_free_symbol(temp_kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+		kbase_destroy_context_symbol(temp_kbcontext);
+#endif
+
+		pr_debug("gator: Mali-Midgard: hardware counters stopped\n");
+
+		clean_symbols();
+	}
+}
+
+static int read_counter(const int cnt, const int len, const struct mali_counter *counter)
+{
+	const int block = GET_HW_BLOCK(cnt);
+	const int counter_offset = GET_COUNTER_OFFSET(cnt);
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	const char *block_base_address = (char *)in_out_info->kernel_dump_buffer;
+	int i;
+	int shader_core_count = 0;
+	u32 value = 0;
+
+	for (i = 0; i < in_out_info->nr_hwc_blocks; i++) {
+		if (block == in_out_info->hwc_layout[i]) {
+			value += *((u32 *)(block_base_address + (0x100 * i)) + counter_offset);
+			if (block == SHADER_BLOCK)
+				++shader_core_count;
+		}
+	}
+
+	if (shader_core_count > 1)
+		value /= shader_core_count;
+#else
+	const char *block_base_address = (char *)kernel_dump_buffer + vithar_blocks[block];
+
+	/* If counter belongs to shader block need to take into account all cores */
+	if (block == SHADER_BLOCK) {
+		int i = 0;
+		int shader_core_count = 0;
+
+		value = 0;
+
+		for (i = 0; i < 4; i++) {
+			if ((shader_present_low >> i) & 1) {
+				value += *((u32 *)(block_base_address + (0x100 * i)) + counter_offset);
+				shader_core_count++;
+			}
+		}
+
+		for (i = 0; i < 4; i++) {
+			if ((shader_present_low >> (i+4)) & 1) {
+				value += *((u32 *)(block_base_address + (0x100 * i) + 0x800) + counter_offset);
+				shader_core_count++;
+			}
+		}
+
+		/* Need to total by number of cores to produce an average */
+		if (shader_core_count != 0)
+			value /= shader_core_count;
+	} else {
+		value = *((u32 *)block_base_address + counter_offset);
+	}
+#endif
+
+	counter_dump[len + 0] = counter->key;
+	counter_dump[len + 1] = value;
+
+	return 2;
+}
+
+static int read(int **buffer, bool sched_switch)
+{
+	int cnt;
+	int len = 0;
+	uint32_t success;
+
+	struct timespec current_time;
+	static u32 prev_time_s;
+	static s32 next_read_time_ns;
+
+	if (!on_primary_core() || sched_switch)
+		return 0;
+
+	getnstimeofday(&current_time);
+
+	/*
+	 * Discard reads unless a respectable time has passed. This
+	 * reduces the load on the GPU without sacrificing accuracy on
+	 * the Streamline display.
+	 */
+	if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns))
+		return 0;
+
+	/*
+	 * Report the HW counters
+	 * Only process hardware counters if at least one of the hardware counters is enabled.
+	 */
+	if (num_hardware_counters_enabled > 0) {
+#if MALI_DDK_GATOR_API_VERSION != 3
+		const unsigned int vithar_blocks[] = {
+			0x700,	/* VITHAR_JOB_MANAGER,     Block 0 */
+			0x400,	/* VITHAR_TILER,           Block 1 */
+			0x000,	/* VITHAR_SHADER_CORE,     Block 2 */
+			0x500	/* VITHAR_MEMORY_SYSTEM,   Block 3 */
+		};
+#endif
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+		if (!handles)
+			return -1;
+
+		/* Mali symbols can be called safely since a kbcontext is valid */
+		if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) {
+#else
+		if (!kbcontext)
+			return -1;
+
+		/* Mali symbols can be called safely since a kbcontext is valid */
+		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
+#endif
+			kbase_device_busy = false;
+
+			if (success == MALI_TRUE) {
+				/* Cycle through hardware counters and accumulate totals */
+				for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+					const struct mali_counter *counter = &counters[cnt];
+
+					if (counter->enabled)
+						len += read_counter(cnt, len, counter);
+				}
+			}
+		}
+
+		if (!kbase_device_busy) {
+			kbase_device_busy = true;
+#if MALI_DDK_GATOR_API_VERSION == 3
+			kbase_gator_instr_hwcnt_dump_irq_symbol(handles);
+#else
+			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+#endif
+		}
+	}
+
+	/* Update the buffer */
+	if (buffer)
+		*buffer = counter_dump;
+
+	return len;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	unsigned int event;
+	/*
+	 * Create the filesystem for all events
+	 */
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system("Midgard", mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0)
+			return -1;
+	}
+
+	for (event = 0; event < number_of_hardware_counters; event++) {
+		if (gator_mali_create_file_system(mali_name, hardware_counter_names[event], sb, root, &counters[event], NULL) != 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static void shutdown(void)
+{
+#if MALI_DDK_GATOR_API_VERSION == 3
+	void (*kbase_gator_hwcnt_term_names_symbol)(void) = NULL;
+	int error_count = 0;
+#endif
+
+	kfree(counters);
+	kfree(counter_dump);
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_gator_hwcnt_term_names, error_count);
+
+	number_of_hardware_counters = -1;
+	hardware_counter_names = NULL;
+	if (kbase_gator_hwcnt_term_names_symbol != NULL) {
+		kbase_gator_hwcnt_term_names_symbol();
+		pr_err("Released symbols\n");
+	}
+
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_term_names);
+#endif
+}
+
+static struct gator_interface gator_events_mali_midgard_interface = {
+	.shutdown = shutdown,
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read
+};
+
+int gator_events_mali_midgard_hw_init(void)
+{
+#if MALI_DDK_GATOR_API_VERSION == 3
+	const char *const *(*kbase_gator_hwcnt_init_names_symbol)(uint32_t *) = NULL;
+	int error_count = 0;
+#endif
+
+	pr_debug("gator: Mali-Midgard: sw_counters init\n");
+
+#if GATOR_TEST
+	test_all_is_read_scheduled();
+#endif
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_gator_hwcnt_init_names, error_count);
+	if (error_count > 0) {
+		SYMBOL_CLEANUP(kbase_gator_hwcnt_init_names);
+		return 1;
+	}
+
+	number_of_hardware_counters = -1;
+	hardware_counter_names = kbase_gator_hwcnt_init_names_symbol(&number_of_hardware_counters);
+
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_init_names);
+
+	if ((hardware_counter_names == NULL) || (number_of_hardware_counters <= 0)) {
+		pr_err("gator: Error reading hardware counters names: got %d names\n", number_of_hardware_counters);
+		return -1;
+	}
+#else
+	mali_name = "Midgard";
+#endif
+
+	counters = kmalloc(sizeof(*counters)*number_of_hardware_counters, GFP_KERNEL);
+	counter_dump = kmalloc(sizeof(*counter_dump)*number_of_hardware_counters*2, GFP_KERNEL);
+
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
+	gator_mali_initialise_counters(counters, number_of_hardware_counters);
+
+	return gator_events_install(&gator_events_mali_midgard_interface);
+}
diff --git a/drivers/gator/gator_events_mali_midgard_hw_test.c b/drivers/gator/gator_events_mali_midgard_hw_test.c
new file mode 100644
index 000000000000..31a91e1c72b2
--- /dev/null
+++ b/drivers/gator/gator_events_mali_midgard_hw_test.c
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * Test functions for mali_t600_hw code.
+ */
+
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns);
+
+static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns)
+{
+	struct timespec current_time;
+	u32 prev_time_s = prev_s;
+	s32 next_read_time_ns = next_ns;
+
+	current_time.tv_sec = s;
+	current_time.tv_nsec = ns;
+
+	if (is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns) != expected_result) {
+		pr_err("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
+		return 0;
+	}
+
+	if (next_read_time_ns != expected_next_ns) {
+		pr_err("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void test_all_is_read_scheduled(void)
+{
+	const int HIGHEST_NS = 999999999;
+	int n_tests_passed = 0;
+
+	pr_err("gator: running tests on %s\n", __FILE__);
+
+	n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC);	/* Null time */
+	n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000);	/* Initial values */
+
+	n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500);
+	n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC);
+	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC);
+
+	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC);
+
+	pr_err("gator: %d tests passed\n", n_tests_passed);
+}
diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c
new file mode 100644
index 000000000000..c625ac5af9cd
--- /dev/null
+++ b/drivers/gator/gator_events_meminfo.c
@@ -0,0 +1,470 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/hardirq.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <trace/events/kmem.h>
+
+#define USE_THREAD defined(CONFIG_PREEMPT_RT_FULL)
+
+enum {
+	MEMINFO_MEMFREE,
+	MEMINFO_MEMUSED,
+	MEMINFO_BUFFERRAM,
+	MEMINFO_TOTAL,
+};
+
+enum {
+	PROC_SIZE,
+	PROC_SHARE,
+	PROC_TEXT,
+	PROC_DATA,
+	PROC_COUNT,
+};
+
+static const char * const meminfo_names[] = {
+	"Linux_meminfo_memfree",
+	"Linux_meminfo_memused",
+	"Linux_meminfo_bufferram",
+};
+
+static const char * const proc_names[] = {
+	"Linux_proc_statm_size",
+	"Linux_proc_statm_share",
+	"Linux_proc_statm_text",
+	"Linux_proc_statm_data",
+};
+
+static bool meminfo_global_enabled;
+static ulong meminfo_enabled[MEMINFO_TOTAL];
+static ulong meminfo_keys[MEMINFO_TOTAL];
+static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)];
+static int meminfo_length;
+static bool new_data_avail;
+
+static bool proc_global_enabled;
+static ulong proc_enabled[PROC_COUNT];
+static ulong proc_keys[PROC_COUNT];
+static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]);
+
+#if USE_THREAD
+
+static int gator_meminfo_func(void *data);
+static bool gator_meminfo_run;
+/* Initialize semaphore unlocked to initialize memory values */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+static DECLARE_MUTEX(gator_meminfo_sem);
+#else
+static DEFINE_SEMAPHORE(gator_meminfo_sem);
+#endif
+
+static void notify(void)
+{
+	up(&gator_meminfo_sem);
+}
+
+#else
+
+static unsigned int mem_event;
+static void wq_sched_handler(struct work_struct *wsptr);
+DECLARE_WORK(work, wq_sched_handler);
+static struct timer_list meminfo_wake_up_timer;
+static void meminfo_wake_up_handler(unsigned long unused_data);
+
+static void notify(void)
+{
+	mem_event++;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order))
+#else
+GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order))
+#endif
+{
+	notify();
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold))
+#else
+GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold))
+#endif
+{
+	notify();
+}
+
+GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype))
+{
+	notify();
+}
+
+static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		dir = gatorfs_mkdir(sb, root, meminfo_names[i]);
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]);
+	}
+
+	for (i = 0; i < PROC_COUNT; ++i) {
+		dir = gatorfs_mkdir(sb, root, proc_names[i]);
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]);
+	}
+
+	return 0;
+}
+
+static int gator_events_meminfo_start(void)
+{
+	int i;
+
+	new_data_avail = false;
+	meminfo_global_enabled = 0;
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		if (meminfo_enabled[i]) {
+			meminfo_global_enabled = 1;
+			break;
+		}
+	}
+
+	proc_global_enabled = 0;
+	for (i = 0; i < PROC_COUNT; ++i) {
+		if (proc_enabled[i]) {
+			proc_global_enabled = 1;
+			break;
+		}
+	}
+	if (meminfo_enabled[MEMINFO_MEMUSED])
+		proc_global_enabled = 1;
+
+	if (meminfo_global_enabled == 0)
+		return 0;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	if (GATOR_REGISTER_TRACE(mm_page_free_direct))
+#else
+	if (GATOR_REGISTER_TRACE(mm_page_free))
+#endif
+		goto mm_page_free_exit;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	if (GATOR_REGISTER_TRACE(mm_pagevec_free))
+#else
+	if (GATOR_REGISTER_TRACE(mm_page_free_batched))
+#endif
+		goto mm_page_free_batched_exit;
+	if (GATOR_REGISTER_TRACE(mm_page_alloc))
+		goto mm_page_alloc_exit;
+
+#if USE_THREAD
+	/* Start worker thread */
+	gator_meminfo_run = true;
+	/* Since the mutex starts unlocked, memory values will be initialized */
+	if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo")))
+		goto kthread_run_exit;
+#else
+	setup_timer(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0);
+#endif
+
+	return 0;
+
+#if USE_THREAD
+kthread_run_exit:
+	GATOR_UNREGISTER_TRACE(mm_page_alloc);
+#endif
+mm_page_alloc_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+	GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+mm_page_free_batched_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+#else
+	GATOR_UNREGISTER_TRACE(mm_page_free);
+#endif
+mm_page_free_exit:
+	return -1;
+}
+
+static void gator_events_meminfo_stop(void)
+{
+	if (meminfo_global_enabled) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+		GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+		GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+		GATOR_UNREGISTER_TRACE(mm_page_free);
+		GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+		GATOR_UNREGISTER_TRACE(mm_page_alloc);
+
+#if USE_THREAD
+		/* Stop worker thread */
+		gator_meminfo_run = false;
+		up(&gator_meminfo_sem);
+#else
+		del_timer_sync(&meminfo_wake_up_timer);
+#endif
+	}
+}
+
+static void do_read(void)
+{
+	struct sysinfo info;
+	int i, len;
+	unsigned long long value;
+
+	meminfo_length = len = 0;
+
+	si_meminfo(&info);
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		if (meminfo_enabled[i]) {
+			switch (i) {
+			case MEMINFO_MEMFREE:
+				value = info.freeram * PAGE_SIZE;
+				break;
+			case MEMINFO_MEMUSED:
+				/* pid -1 means system wide */
+				meminfo_buffer[len++] = 1;
+				meminfo_buffer[len++] = -1;
+				/* Emit value */
+				meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED];
+				meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE;
+				/* Clear pid */
+				meminfo_buffer[len++] = 1;
+				meminfo_buffer[len++] = 0;
+				continue;
+			case MEMINFO_BUFFERRAM:
+				value = info.bufferram * PAGE_SIZE;
+				break;
+			default:
+				value = 0;
+				break;
+			}
+			meminfo_buffer[len++] = meminfo_keys[i];
+			meminfo_buffer[len++] = value;
+		}
+	}
+
+	meminfo_length = len;
+	new_data_avail = true;
+}
+
+#if USE_THREAD
+
+static int gator_meminfo_func(void *data)
+{
+	for (;;) {
+		if (down_killable(&gator_meminfo_sem))
+			break;
+
+		/* Eat up any pending events */
+		while (!down_trylock(&gator_meminfo_sem))
+			;
+
+		if (!gator_meminfo_run)
+			break;
+
+		do_read();
+	}
+
+	return 0;
+}
+
+#else
+
+/* Must be run in process context as the kernel function si_meminfo() can sleep */
+static void wq_sched_handler(struct work_struct *wsptr)
+{
+	do_read();
+}
+
+static void meminfo_wake_up_handler(unsigned long unused_data)
+{
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
+	schedule_work(&work);
+}
+
+#endif
+
+static int gator_events_meminfo_read(long long **buffer)
+{
+#if !USE_THREAD
+	static unsigned int last_mem_event;
+#endif
+
+	if (!on_primary_core() || !meminfo_global_enabled)
+		return 0;
+
+#if !USE_THREAD
+	if (last_mem_event != mem_event) {
+		last_mem_event = mem_event;
+		mod_timer(&meminfo_wake_up_timer, jiffies + 1);
+	}
+#endif
+
+	if (!new_data_avail)
+		return 0;
+
+	new_data_avail = false;
+
+	if (buffer)
+		*buffer = meminfo_buffer;
+
+	return meminfo_length;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+
+static inline unsigned long gator_get_mm_counter(struct mm_struct *mm, int member)
+{
+#ifdef SPLIT_RSS_COUNTING
+	long val = atomic_long_read(&mm->rss_stat.count[member]);
+
+	if (val < 0)
+		val = 0;
+	return (unsigned long)val;
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	return mm->rss_stat.count[member];
+#else
+	return atomic_long_read(&mm->rss_stat.count[member]);
+#endif
+#endif
+}
+
+#define get_mm_counter(mm, member) gator_get_mm_counter(mm, member)
+
+#endif
+
+static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct *task)
+{
+	struct mm_struct *mm;
+	u64 share = 0;
+	int i;
+	long long value;
+	int len = 0;
+	int cpu = get_physical_cpu();
+	long long *buf = per_cpu(proc_buffer, cpu);
+
+	if (!proc_global_enabled)
+		return 0;
+
+	/* Collect the memory stats of the process instead of the thread */
+	if (task->group_leader != NULL)
+		task = task->group_leader;
+
+	/* get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch */
+	mm = task->mm;
+	if (mm == NULL)
+		return 0;
+
+	/* Derived from task_statm in fs/proc/task_mmu.c */
+	if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) {
+		share = get_mm_counter(mm,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)
+							   file_rss
+#else
+							   MM_FILEPAGES
+#endif
+							   );
+	}
+
+	/* key of 1 indicates a pid */
+	buf[len++] = 1;
+	buf[len++] = task->pid;
+
+	for (i = 0; i < PROC_COUNT; ++i) {
+		if (proc_enabled[i]) {
+			switch (i) {
+			case PROC_SIZE:
+				value = mm->total_vm;
+				break;
+			case PROC_SHARE:
+				value = share;
+				break;
+			case PROC_TEXT:
+				value = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT;
+				break;
+			case PROC_DATA:
+				value = mm->total_vm - mm->shared_vm;
+				break;
+			}
+
+			buf[len++] = proc_keys[i];
+			buf[len++] = value * PAGE_SIZE;
+		}
+	}
+
+	if (meminfo_enabled[MEMINFO_MEMUSED]) {
+		value = share + get_mm_counter(mm,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)
+									   anon_rss
+#else
+									   MM_ANONPAGES
+#endif
+									   );
+		/* Send resident for this pid */
+		buf[len++] = meminfo_keys[MEMINFO_MEMUSED];
+		buf[len++] = value * PAGE_SIZE;
+	}
+
+	/* Clear pid */
+	buf[len++] = 1;
+	buf[len++] = 0;
+
+	if (buffer)
+		*buffer = buf;
+
+	return len;
+}
+
+static struct gator_interface gator_events_meminfo_interface = {
+	.create_files = gator_events_meminfo_create_files,
+	.start = gator_events_meminfo_start,
+	.stop = gator_events_meminfo_stop,
+	.read64 = gator_events_meminfo_read,
+	.read_proc = gator_events_meminfo_read_proc,
+};
+
+int gator_events_meminfo_init(void)
+{
+	int i;
+
+	meminfo_global_enabled = 0;
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		meminfo_enabled[i] = 0;
+		meminfo_keys[i] = gator_events_get_key();
+	}
+
+	proc_global_enabled = 0;
+	for (i = 0; i < PROC_COUNT; ++i) {
+		proc_enabled[i] = 0;
+		proc_keys[i] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_meminfo_interface);
+}
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
new file mode 100644
index 000000000000..6b2af995ed41
--- /dev/null
+++ b/drivers/gator/gator_events_mmapped.c
@@ -0,0 +1,209 @@
+/*
+ * Example events provider
+ *
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Similar entries to those below must be present in the events.xml file.
+ * To add them to the events.xml, create an events-mmap.xml with the
+ * following contents and rebuild gatord:
+ *
+ * <category name="mmapped">
+ *   <event counter="mmapped_cnt0" title="Simulated1" name="Sine" display="maximum" class="absolute" description="Sort-of-sine"/>
+ *   <event counter="mmapped_cnt1" title="Simulated2" name="Triangle" display="maximum" class="absolute" description="Triangular wave"/>
+ *   <event counter="mmapped_cnt2" title="Simulated3" name="PWM" display="maximum" class="absolute" description="PWM Signal"/>
+ * </category>
+ *
+ * When adding custom events, be sure to do the following:
+ * - add any needed .c files to the gator driver Makefile
+ * - call gator_events_install in the events init function
+ * - add the init function to GATOR_EVENTS_LIST in gator_main.c
+ * - add a new events-*.xml file to the gator daemon and rebuild
+ *
+ * Troubleshooting:
+ * - verify the new events are part of events.xml, which is created when building the daemon
+ * - verify the new events exist at /dev/gator/events/ once gatord is launched
+ * - verify the counter name in the XML matches the name at /dev/gator/events
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ratelimit.h>
+
+#include "gator.h"
+
+#define MMAPPED_COUNTERS_NUM 3
+
+static int mmapped_global_enabled;
+
+static struct {
+	unsigned long enabled;
+	unsigned long key;
+} mmapped_counters[MMAPPED_COUNTERS_NUM];
+
+static int mmapped_buffer[MMAPPED_COUNTERS_NUM * 2];
+
+static s64 prev_time;
+
+/* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */
+static int gator_events_mmapped_create_files(struct super_block *sb,
+					     struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		char buf[16];
+		struct dentry *dir;
+
+		snprintf(buf, sizeof(buf), "mmapped_cnt%d", i);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (WARN_ON(!dir))
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled",
+				     &mmapped_counters[i].enabled);
+		gatorfs_create_ro_ulong(sb, dir, "key",
+					&mmapped_counters[i].key);
+	}
+
+	return 0;
+}
+
+static int gator_events_mmapped_start(void)
+{
+	int i;
+	struct timespec ts;
+
+	getnstimeofday(&ts);
+	prev_time = timespec_to_ns(&ts);
+
+	mmapped_global_enabled = 0;
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		if (mmapped_counters[i].enabled) {
+			mmapped_global_enabled = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void gator_events_mmapped_stop(void)
+{
+}
+
+/* This function "simulates" counters, generating values of fancy
+ * functions like sine or triangle... */
+static int mmapped_simulate(int counter, int delta_in_us)
+{
+	int result = 0;
+
+	switch (counter) {
+	case 0:		/* sort-of-sine */
+		{
+			static int t;
+			int x;
+
+			t += delta_in_us;
+			if (t > 2048000)
+				t = 0;
+
+			if (t % 1024000 < 512000)
+				x = 512000 - (t % 512000);
+			else
+				x = t % 512000;
+
+			result = 32 * x / 512000;
+			result = result * result;
+
+			if (t < 1024000)
+				result = 1922 - result;
+		}
+		break;
+	case 1:		/* triangle */
+		{
+			static int v, d = 1;
+
+			v = v + d * delta_in_us;
+			if (v < 0) {
+				v = 0;
+				d = 1;
+			} else if (v > 1000000) {
+				v = 1000000;
+				d = -1;
+			}
+
+			result = v;
+		}
+		break;
+	case 2:		/* PWM signal */
+		{
+			static int dc, x, t;
+
+			t += delta_in_us;
+			if (t > 1000000)
+				t = 0;
+			if (x / 1000000 != (x + delta_in_us) / 1000000)
+				dc = (dc + 100000) % 1000000;
+			x += delta_in_us;
+
+			result = t < dc ? 0 : 10;
+		}
+		break;
+	}
+
+	return result;
+}
+
+static int gator_events_mmapped_read(int **buffer, bool sched_switch)
+{
+	int i;
+	int len = 0;
+	int delta_in_us;
+	struct timespec ts;
+	s64 time;
+
+	/* System wide counters - read from one core only */
+	if (!on_primary_core() || !mmapped_global_enabled)
+		return 0;
+
+	getnstimeofday(&ts);
+	time = timespec_to_ns(&ts);
+	delta_in_us = (int)(time - prev_time) / 1000;
+	prev_time = time;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		if (mmapped_counters[i].enabled) {
+			mmapped_buffer[len++] = mmapped_counters[i].key;
+			mmapped_buffer[len++] =
+			    mmapped_simulate(i, delta_in_us);
+		}
+	}
+
+	if (buffer)
+		*buffer = mmapped_buffer;
+
+	return len;
+}
+
+static struct gator_interface gator_events_mmapped_interface = {
+	.create_files = gator_events_mmapped_create_files,
+	.start = gator_events_mmapped_start,
+	.stop = gator_events_mmapped_stop,
+	.read = gator_events_mmapped_read,
+};
+
+/* Must not be static! */
+int __init gator_events_mmapped_init(void)
+{
+	int i;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		mmapped_counters[i].enabled = 0;
+		mmapped_counters[i].key = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_mmapped_interface);
+}
diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c
new file mode 100644
index 000000000000..d21b4db7b77c
--- /dev/null
+++ b/drivers/gator/gator_events_net.c
@@ -0,0 +1,172 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <linux/netdevice.h>
+#include <linux/hardirq.h>
+
+#define NETRX		0
+#define NETTX		1
+#define TOTALNET	2
+
+static ulong netrx_enabled;
+static ulong nettx_enabled;
+static ulong netrx_key;
+static ulong nettx_key;
+static int rx_total, tx_total;
+static ulong netPrev[TOTALNET];
+static int netGet[TOTALNET * 4];
+
+static struct timer_list net_wake_up_timer;
+
+/* Must be run in process context as the kernel function dev_get_stats() can sleep */
+static void get_network_stats(struct work_struct *wsptr)
+{
+	int rx = 0, tx = 0;
+	struct net_device *dev;
+
+	for_each_netdev(&init_net, dev) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+		const struct net_device_stats *stats = dev_get_stats(dev);
+#else
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+#endif
+		rx += stats->rx_bytes;
+		tx += stats->tx_bytes;
+	}
+	rx_total = rx;
+	tx_total = tx;
+}
+
+DECLARE_WORK(wq_get_stats, get_network_stats);
+
+static void net_wake_up_handler(unsigned long unused_data)
+{
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
+	schedule_work(&wq_get_stats);
+}
+
+static void calculate_delta(int *rx, int *tx)
+{
+	int rx_calc, tx_calc;
+
+	rx_calc = (int)(rx_total - netPrev[NETRX]);
+	if (rx_calc < 0)
+		rx_calc = 0;
+	netPrev[NETRX] += rx_calc;
+
+	tx_calc = (int)(tx_total - netPrev[NETTX]);
+	if (tx_calc < 0)
+		tx_calc = 0;
+	netPrev[NETTX] += tx_calc;
+
+	*rx = rx_calc;
+	*tx = tx_calc;
+}
+
+static int gator_events_net_create_files(struct super_block *sb, struct dentry *root)
+{
+	/* Network counters are not currently supported in RT-Preempt full because mod_timer is used */
+#ifndef CONFIG_PREEMPT_RT_FULL
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, "Linux_net_rx");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &netrx_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &netrx_key);
+
+	dir = gatorfs_mkdir(sb, root, "Linux_net_tx");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key);
+#endif
+
+	return 0;
+}
+
+static int gator_events_net_start(void)
+{
+	get_network_stats(0);
+	netPrev[NETRX] = rx_total;
+	netPrev[NETTX] = tx_total;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+	setup_timer(&net_wake_up_timer, net_wake_up_handler, 0);
+#else
+	setup_deferrable_timer_on_stack(&net_wake_up_timer, net_wake_up_handler, 0);
+#endif
+	return 0;
+}
+
+static void gator_events_net_stop(void)
+{
+	del_timer_sync(&net_wake_up_timer);
+	netrx_enabled = 0;
+	nettx_enabled = 0;
+}
+
+static int gator_events_net_read(int **buffer, bool sched_switch)
+{
+	int len, rx_delta, tx_delta;
+	static int last_rx_delta, last_tx_delta;
+
+	if (!on_primary_core())
+		return 0;
+
+	if (!netrx_enabled && !nettx_enabled)
+		return 0;
+
+	mod_timer(&net_wake_up_timer, jiffies + 1);
+
+	calculate_delta(&rx_delta, &tx_delta);
+
+	len = 0;
+	if (netrx_enabled && last_rx_delta != rx_delta) {
+		last_rx_delta = rx_delta;
+		netGet[len++] = netrx_key;
+		/* indicates to Streamline that rx_delta bytes were transmitted now, not since the last message */
+		netGet[len++] = 0;
+		netGet[len++] = netrx_key;
+		netGet[len++] = rx_delta;
+	}
+
+	if (nettx_enabled && last_tx_delta != tx_delta) {
+		last_tx_delta = tx_delta;
+		netGet[len++] = nettx_key;
+		/* indicates to Streamline that tx_delta bytes were transmitted now, not since the last message */
+		netGet[len++] = 0;
+		netGet[len++] = nettx_key;
+		netGet[len++] = tx_delta;
+	}
+
+	if (buffer)
+		*buffer = netGet;
+
+	return len;
+}
+
+static struct gator_interface gator_events_net_interface = {
+	.create_files = gator_events_net_create_files,
+	.start = gator_events_net_start,
+	.stop = gator_events_net_stop,
+	.read = gator_events_net_read,
+};
+
+int gator_events_net_init(void)
+{
+	netrx_key = gator_events_get_key();
+	nettx_key = gator_events_get_key();
+
+	netrx_enabled = 0;
+	nettx_enabled = 0;
+
+	return gator_events_install(&gator_events_net_interface);
+}
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
new file mode 100644
index 000000000000..47cf278e508b
--- /dev/null
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -0,0 +1,574 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+/* gator_events_armvX.c is used for Linux 2.6.x */
+#if GATOR_PERF_PMU_SUPPORT
+
+#include <linux/io.h>
+#ifdef CONFIG_OF
+#include <linux/of_address.h>
+#endif
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+extern bool event_based_sampling;
+
+/* Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE */
+#define CNTMAX 16
+#define CCI_400 4
+#define CCN_5XX 8
+/* Maximum number of uncore counters */
+/* + 1 for the cci-400 cycles counter */
+/* + 1 for the CCN-5xx cycles counter */
+#define UCCNT (CCI_400 + 1 + CCN_5XX + 1)
+
+/* Default to 0 if unable to probe the revision which was the previous behavior */
+#define DEFAULT_CCI_REVISION 0
+
+/* A gator_attr is needed for every counter */
+struct gator_attr {
+	/* Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs */
+	char name[40];
+	/* Exposed in gatorfs - set by gatord to enable this counter */
+	unsigned long enabled;
+	/* Set once in gator_events_perf_pmu_*_init - the perf type to use, see perf_type_id in the perf_event.h header file. */
+	unsigned long type;
+	/* Exposed in gatorfs - set by gatord to select the event to collect */
+	unsigned long event;
+	/* Exposed in gatorfs - set by gatord with the sample period to use and enable EBS for this counter */
+	unsigned long count;
+	/* Exposed as read only in gatorfs - set once in __attr_init as the key to use in the APC data */
+	unsigned long key;
+};
+
+/* Per-core counter attributes */
+static struct gator_attr attrs[CNTMAX];
+/* Number of initialized per-core counters */
+static int attr_count;
+/* Uncore counter attributes */
+static struct gator_attr uc_attrs[UCCNT];
+/* Number of initialized uncore counters */
+static int uc_attr_count;
+
+struct gator_event {
+	int curr;
+	int prev;
+	int prev_delta;
+	bool zero;
+	struct perf_event *pevent;
+	struct perf_event_attr *pevent_attr;
+};
+
+static DEFINE_PER_CPU(struct gator_event[CNTMAX], events);
+static struct gator_event uc_events[UCCNT];
+static DEFINE_PER_CPU(int[(CNTMAX + UCCNT)*2], perf_cnt);
+
+static void gator_events_perf_pmu_stop(void);
+
+static int __create_files(struct super_block *sb, struct dentry *root, struct gator_attr *const attr)
+{
+	struct dentry *dir;
+
+	if (attr->name[0] == '\0')
+		return 0;
+	dir = gatorfs_mkdir(sb, root, attr->name);
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled);
+	gatorfs_create_ulong(sb, dir, "count", &attr->count);
+	gatorfs_create_ro_ulong(sb, dir, "key", &attr->key);
+	gatorfs_create_ulong(sb, dir, "event", &attr->event);
+
+	return 0;
+}
+
+static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root)
+{
+	int cnt;
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		if (__create_files(sb, root, &attrs[cnt]) != 0)
+			return -1;
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__create_files(sb, root, &uc_attrs[cnt]) != 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+static void ebs_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs)
+#else
+static void ebs_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
+#endif
+{
+	gator_backtrace_handler(regs);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+static void dummy_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs)
+#else
+static void dummy_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
+#endif
+{
+	/* Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll */
+}
+
+static int gator_events_perf_pmu_read(int **buffer, bool sched_switch);
+
+static int gator_events_perf_pmu_online(int **buffer, bool migrate)
+{
+	return gator_events_perf_pmu_read(buffer, false);
+}
+
+static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event)
+{
+	perf_overflow_handler_t handler;
+
+	event->zero = true;
+
+	if (event->pevent != NULL || event->pevent_attr == 0 || migrate)
+		return;
+
+	if (attr->count > 0)
+		handler = ebs_overflow_handler;
+	else
+		handler = dummy_handler;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler);
+#else
+	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0);
+#endif
+	if (IS_ERR(event->pevent)) {
+		pr_debug("gator: unable to online a counter on cpu %d\n", cpu);
+		event->pevent = NULL;
+		return;
+	}
+
+	if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_debug("gator: inactive counter on cpu %d\n", cpu);
+		perf_event_release_kernel(event->pevent);
+		event->pevent = NULL;
+		return;
+	}
+}
+
+static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate)
+{
+	int cnt;
+
+	cpu = pcpu_to_lcpu(cpu);
+
+	for (cnt = 0; cnt < attr_count; cnt++)
+		__online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
+			__online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]);
+	}
+}
+
+static void __offline_dispatch(int cpu, struct gator_event *const event)
+{
+	struct perf_event *pe = NULL;
+
+	if (event->pevent) {
+		pe = event->pevent;
+		event->pevent = NULL;
+	}
+
+	if (pe)
+		perf_event_release_kernel(pe);
+}
+
+static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate)
+{
+	int cnt;
+
+	if (migrate)
+		return;
+	cpu = pcpu_to_lcpu(cpu);
+
+	for (cnt = 0; cnt < attr_count; cnt++)
+		__offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]);
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
+			__offline_dispatch(cpu, &uc_events[cnt]);
+	}
+}
+
+static int __check_ebs(struct gator_attr *const attr)
+{
+	if (attr->count > 0) {
+		if (!event_based_sampling) {
+			event_based_sampling = true;
+		} else {
+			pr_warning("gator: Only one ebs counter is allowed\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __start(struct gator_attr *const attr, struct gator_event *const event)
+{
+	u32 size = sizeof(struct perf_event_attr);
+
+	event->pevent = NULL;
+	/* Skip disabled counters */
+	if (!attr->enabled)
+		return 0;
+
+	event->prev = 0;
+	event->curr = 0;
+	event->prev_delta = 0;
+	event->pevent_attr = kmalloc(size, GFP_KERNEL);
+	if (!event->pevent_attr) {
+		gator_events_perf_pmu_stop();
+		return -1;
+	}
+
+	memset(event->pevent_attr, 0, size);
+	event->pevent_attr->type = attr->type;
+	event->pevent_attr->size = size;
+	event->pevent_attr->config = attr->event;
+	event->pevent_attr->sample_period = attr->count;
+	event->pevent_attr->pinned = 1;
+
+	return 0;
+}
+
+static int gator_events_perf_pmu_start(void)
+{
+	int cnt, cpu;
+
+	event_based_sampling = false;
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		if (__check_ebs(&attrs[cnt]) != 0)
+			return -1;
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__check_ebs(&uc_attrs[cnt]) != 0)
+			return -1;
+	}
+
+	for_each_present_cpu(cpu) {
+		for (cnt = 0; cnt < attr_count; cnt++) {
+			if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0)
+				return -1;
+		}
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static void __event_stop(struct gator_event *const event)
+{
+	kfree(event->pevent_attr);
+	event->pevent_attr = NULL;
+}
+
+static void __attr_stop(struct gator_attr *const attr)
+{
+	attr->enabled = 0;
+	attr->event = 0;
+	attr->count = 0;
+}
+
+static void gator_events_perf_pmu_stop(void)
+{
+	unsigned int cnt, cpu;
+
+	for_each_present_cpu(cpu) {
+		for (cnt = 0; cnt < attr_count; cnt++)
+			__event_stop(&per_cpu(events, cpu)[cnt]);
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++)
+		__event_stop(&uc_events[cnt]);
+
+	for (cnt = 0; cnt < attr_count; cnt++)
+		__attr_stop(&attrs[cnt]);
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++)
+		__attr_stop(&uc_attrs[cnt]);
+}
+
+static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event)
+{
+	int delta;
+	struct perf_event *const ev = event->pevent;
+
+	if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) {
+		/* After creating the perf counter in __online_dispatch, there
+		 * is a race condition between gator_events_perf_pmu_online and
+		 * gator_events_perf_pmu_read. So have
+		 * gator_events_perf_pmu_online call gator_events_perf_pmu_read
+		 * and in __read check to see if it's the first call after
+		 * __online_dispatch and if so, run the online code.
+		 */
+		if (event->zero) {
+			ev->pmu->read(ev);
+			event->prev = event->curr = local64_read(&ev->count);
+			event->prev_delta = 0;
+			per_cpu(perf_cnt, cpu)[(*len)++] = attr->key;
+			per_cpu(perf_cnt, cpu)[(*len)++] = 0;
+			event->zero = false;
+		} else {
+			ev->pmu->read(ev);
+			event->curr = local64_read(&ev->count);
+			delta = event->curr - event->prev;
+			if (delta != 0 || delta != event->prev_delta) {
+				event->prev_delta = delta;
+				event->prev = event->curr;
+				per_cpu(perf_cnt, cpu)[(*len)++] = attr->key;
+				if (delta < 0)
+					delta *= -1;
+				per_cpu(perf_cnt, cpu)[(*len)++] = delta;
+			}
+		}
+	}
+}
+
+static int gator_events_perf_pmu_read(int **buffer, bool sched_switch)
+{
+	int cnt, len = 0;
+	const int cpu = get_logical_cpu();
+
+	for (cnt = 0; cnt < attr_count; cnt++)
+		__read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
+			__read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]);
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perf_cnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_perf_pmu_interface = {
+	.create_files = gator_events_perf_pmu_create_files,
+	.start = gator_events_perf_pmu_start,
+	.stop = gator_events_perf_pmu_stop,
+	.online = gator_events_perf_pmu_online,
+	.online_dispatch = gator_events_perf_pmu_online_dispatch,
+	.offline_dispatch = gator_events_perf_pmu_offline_dispatch,
+	.read = gator_events_perf_pmu_read,
+};
+
+static void __attr_init(struct gator_attr *const attr)
+{
+	attr->name[0] = '\0';
+	attr->enabled = 0;
+	attr->type = 0;
+	attr->event = 0;
+	attr->count = 0;
+	attr->key = gator_events_get_key();
+}
+
+#ifdef CONFIG_OF
+
+static const struct of_device_id arm_cci_matches[] = {
+	{.compatible = "arm,cci-400" },
+	{},
+};
+
+static int probe_cci_revision(void)
+{
+	struct device_node *np;
+	struct resource res;
+	void __iomem *cci_ctrl_base;
+	int rev;
+	int ret = DEFAULT_CCI_REVISION;
+
+	np = of_find_matching_node(NULL, arm_cci_matches);
+	if (!np)
+		return ret;
+
+	if (of_address_to_resource(np, 0, &res))
+		goto node_put;
+
+	cci_ctrl_base = ioremap(res.start, resource_size(&res));
+
+	rev = (readl_relaxed(cci_ctrl_base + 0xfe8) >> 4) & 0xf;
+
+	if (rev <= 4)
+		ret = 0;
+	else if (rev <= 6)
+		ret = 1;
+
+	iounmap(cci_ctrl_base);
+
+ node_put:
+	of_node_put(np);
+
+	return ret;
+}
+
+#else
+
+static int probe_cci_revision(void)
+{
+	return DEFAULT_CCI_REVISION;
+}
+
+#endif
+
+static void gator_events_perf_pmu_uncore_init(const char *const name, const int type, const int count)
+{
+	int cnt;
+
+	snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", name);
+	uc_attrs[uc_attr_count].type = type;
+	++uc_attr_count;
+
+	for (cnt = 0; cnt < count; ++cnt, ++uc_attr_count) {
+		struct gator_attr *const attr = &uc_attrs[uc_attr_count];
+
+		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", name, cnt);
+		attr->type = type;
+	}
+}
+
+static void gator_events_perf_pmu_cci_init(const int type)
+{
+	const char *cci_name;
+
+	switch (probe_cci_revision()) {
+	case 0:
+		cci_name = "CCI_400";
+		break;
+	case 1:
+		cci_name = "CCI_400-r1";
+		break;
+	default:
+		pr_debug("gator: unrecognized cci-400 revision\n");
+		return;
+	}
+
+	gator_events_perf_pmu_uncore_init(cci_name, type, CCI_400);
+}
+
+static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type)
+{
+	int cnt;
+
+	snprintf(attrs[attr_count].name, sizeof(attrs[attr_count].name), "%s_ccnt", gator_cpu->pmnc_name);
+	attrs[attr_count].type = type;
+	++attr_count;
+
+	for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) {
+		struct gator_attr *const attr = &attrs[attr_count];
+
+		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt);
+		attr->type = type;
+	}
+}
+
+int gator_events_perf_pmu_init(void)
+{
+	struct perf_event_attr pea;
+	struct perf_event *pe;
+	const struct gator_cpu *gator_cpu;
+	int type;
+	int cpu;
+	int cnt;
+	bool found_cpu = false;
+
+	for (cnt = 0; cnt < CNTMAX; cnt++)
+		__attr_init(&attrs[cnt]);
+	for (cnt = 0; cnt < UCCNT; cnt++)
+		__attr_init(&uc_attrs[cnt]);
+
+	memset(&pea, 0, sizeof(pea));
+	pea.size = sizeof(pea);
+	pea.config = 0xFF;
+	attr_count = 0;
+	uc_attr_count = 0;
+	for (type = PERF_TYPE_MAX; type < 0x20; ++type) {
+		pea.type = type;
+
+		/* A particular PMU may work on some but not all cores, so try on each core */
+		pe = NULL;
+		for_each_present_cpu(cpu) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+			pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler);
+#else
+			pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0);
+#endif
+			if (!IS_ERR(pe))
+				break;
+		}
+		/* Assume that valid PMUs are contiguous */
+		if (IS_ERR(pe)) {
+			pea.config = 0xff00;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+			pe = perf_event_create_kernel_counter(&pea, 0, 0, dummy_handler);
+#else
+			pe = perf_event_create_kernel_counter(&pea, 0, 0, dummy_handler, 0);
+#endif
+			if (IS_ERR(pe))
+				break;
+		}
+
+		if (pe->pmu != NULL && type == pe->pmu->type) {
+			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) {
+				gator_events_perf_pmu_cci_init(type);
+			} else if (strcmp("ccn", pe->pmu->name) == 0) {
+				gator_events_perf_pmu_uncore_init("ARM_CCN_5XX", type, CCN_5XX);
+			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
+				found_cpu = true;
+				gator_events_perf_pmu_cpu_init(gator_cpu, type);
+			}
+			/* Initialize gator_attrs for dynamic PMUs here */
+		}
+
+		perf_event_release_kernel(pe);
+	}
+
+	if (!found_cpu) {
+		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid());
+
+		if (gator_cpu == NULL)
+			return -1;
+		gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW);
+	}
+
+	/* Initialize gator_attrs for non-dynamic PMUs here */
+
+	if (attr_count > CNTMAX) {
+		pr_err("gator: Too many perf counters\n");
+		return -1;
+	}
+
+	if (uc_attr_count > UCCNT) {
+		pr_err("gator: Too many perf uncore counters\n");
+		return -1;
+	}
+
+	return gator_events_install(&gator_events_perf_pmu_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c
new file mode 100644
index 000000000000..637107d6af1d
--- /dev/null
+++ b/drivers/gator/gator_events_sched.c
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/sched.h>
+
+#define SCHED_SWITCH	0
+#define SCHED_TOTAL		(SCHED_SWITCH+1)
+
+static ulong sched_switch_enabled;
+static ulong sched_switch_key;
+static DEFINE_PER_CPU(int[SCHED_TOTAL], schedCnt);
+static DEFINE_PER_CPU(int[SCHED_TOTAL * 2], schedGet);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	unsigned long flags;
+
+	/* disable interrupts to synchronize with gator_events_sched_read()
+	 * spinlocks not needed since percpu buffers are used
+	 */
+	local_irq_save(flags);
+	per_cpu(schedCnt, get_physical_cpu())[SCHED_SWITCH]++;
+	local_irq_restore(flags);
+}
+
+static int gator_events_sched_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* switch */
+	dir = gatorfs_mkdir(sb, root, "Linux_sched_switch");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &sched_switch_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_switch_key);
+
+	return 0;
+}
+
+static int gator_events_sched_start(void)
+{
+	/* register tracepoints */
+	if (sched_switch_enabled)
+		if (GATOR_REGISTER_TRACE(sched_switch))
+			goto sched_switch_exit;
+	pr_debug("gator: registered scheduler event tracepoints\n");
+
+	return 0;
+
+	/* unregister tracepoints on error */
+sched_switch_exit:
+	pr_err("gator: scheduler event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_sched_stop(void)
+{
+	if (sched_switch_enabled)
+		GATOR_UNREGISTER_TRACE(sched_switch);
+	pr_debug("gator: unregistered scheduler event tracepoints\n");
+
+	sched_switch_enabled = 0;
+}
+
+static int gator_events_sched_read(int **buffer, bool sched_switch)
+{
+	unsigned long flags;
+	int len, value;
+	int cpu = get_physical_cpu();
+
+	len = 0;
+	if (sched_switch_enabled) {
+		local_irq_save(flags);
+		value = per_cpu(schedCnt, cpu)[SCHED_SWITCH];
+		per_cpu(schedCnt, cpu)[SCHED_SWITCH] = 0;
+		local_irq_restore(flags);
+		per_cpu(schedGet, cpu)[len++] = sched_switch_key;
+		per_cpu(schedGet, cpu)[len++] = value;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(schedGet, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_sched_interface = {
+	.create_files = gator_events_sched_create_files,
+	.start = gator_events_sched_start,
+	.stop = gator_events_sched_stop,
+	.read = gator_events_sched_read,
+};
+
+int gator_events_sched_init(void)
+{
+	sched_switch_enabled = 0;
+
+	sched_switch_key = gator_events_get_key();
+
+	return gator_events_install(&gator_events_sched_interface);
+}
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
new file mode 100644
index 000000000000..49219362db09
--- /dev/null
+++ b/drivers/gator/gator_events_scorpion.c
@@ -0,0 +1,674 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+/* gator_events_perf_pmu.c is used if perf is supported */
+#if GATOR_NO_PERF_SUPPORT
+
+static const char *pmnc_name;
+static int pmnc_counters;
+
+/* Per-CPU PMNC: config reg */
+#define PMNC_E		(1 << 0)	/* Enable all counters */
+#define PMNC_P		(1 << 1)	/* Reset all counters */
+#define PMNC_C		(1 << 2)	/* Cycle counter reset */
+#define PMNC_D		(1 << 3)	/* CCNT counts every 64th cpu cycle */
+#define PMNC_X		(1 << 4)	/* Export to ETM */
+#define PMNC_DP		(1 << 5)	/* Disable CCNT if non-invasive debug */
+#define	PMNC_MASK	0x3f	/* Mask for writable bits */
+
+/* ccnt reg */
+#define CCNT_REG	(1 << 31)
+
+#define CCNT		0
+#define CNT0		1
+#define CNTMAX		(4+1)
+
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+enum scorpion_perf_types {
+	SCORPION_ICACHE_EXPL_INV = 0x4c,
+	SCORPION_ICACHE_MISS = 0x4d,
+	SCORPION_ICACHE_ACCESS = 0x4e,
+	SCORPION_ICACHE_CACHEREQ_L2 = 0x4f,
+	SCORPION_ICACHE_NOCACHE_L2 = 0x50,
+	SCORPION_HIQUP_NOPED = 0x51,
+	SCORPION_DATA_ABORT = 0x52,
+	SCORPION_IRQ = 0x53,
+	SCORPION_FIQ = 0x54,
+	SCORPION_ALL_EXCPT = 0x55,
+	SCORPION_UNDEF = 0x56,
+	SCORPION_SVC = 0x57,
+	SCORPION_SMC = 0x58,
+	SCORPION_PREFETCH_ABORT = 0x59,
+	SCORPION_INDEX_CHECK = 0x5a,
+	SCORPION_NULL_CHECK = 0x5b,
+	SCORPION_EXPL_ICIALLU = 0x5c,
+	SCORPION_IMPL_ICIALLU = 0x5d,
+	SCORPION_NONICIALLU_BTAC_INV = 0x5e,
+	SCORPION_ICIMVAU_IMPL_ICIALLU = 0x5f,
+	SCORPION_SPIPE_ONLY_CYCLES = 0x60,
+	SCORPION_XPIPE_ONLY_CYCLES = 0x61,
+	SCORPION_DUAL_CYCLES = 0x62,
+	SCORPION_DISPATCH_ANY_CYCLES = 0x63,
+	SCORPION_FIFO_FULLBLK_CMT = 0x64,
+	SCORPION_FAIL_COND_INST = 0x65,
+	SCORPION_PASS_COND_INST = 0x66,
+	SCORPION_ALLOW_VU_CLK = 0x67,
+	SCORPION_VU_IDLE = 0x68,
+	SCORPION_ALLOW_L2_CLK = 0x69,
+	SCORPION_L2_IDLE = 0x6a,
+	SCORPION_DTLB_IMPL_INV_SCTLR_DACR = 0x6b,
+	SCORPION_DTLB_EXPL_INV = 0x6c,
+	SCORPION_DTLB_MISS = 0x6d,
+	SCORPION_DTLB_ACCESS = 0x6e,
+	SCORPION_ITLB_MISS = 0x6f,
+	SCORPION_ITLB_IMPL_INV = 0x70,
+	SCORPION_ITLB_EXPL_INV = 0x71,
+	SCORPION_UTLB_D_MISS = 0x72,
+	SCORPION_UTLB_D_ACCESS = 0x73,
+	SCORPION_UTLB_I_MISS = 0x74,
+	SCORPION_UTLB_I_ACCESS = 0x75,
+	SCORPION_UTLB_INV_ASID = 0x76,
+	SCORPION_UTLB_INV_MVA = 0x77,
+	SCORPION_UTLB_INV_ALL = 0x78,
+	SCORPION_S2_HOLD_RDQ_UNAVAIL = 0x79,
+	SCORPION_S2_HOLD = 0x7a,
+	SCORPION_S2_HOLD_DEV_OP = 0x7b,
+	SCORPION_S2_HOLD_ORDER = 0x7c,
+	SCORPION_S2_HOLD_BARRIER = 0x7d,
+	SCORPION_VIU_DUAL_CYCLE = 0x7e,
+	SCORPION_VIU_SINGLE_CYCLE = 0x7f,
+	SCORPION_VX_PIPE_WAR_STALL_CYCLES = 0x80,
+	SCORPION_VX_PIPE_WAW_STALL_CYCLES = 0x81,
+	SCORPION_VX_PIPE_RAW_STALL_CYCLES = 0x82,
+	SCORPION_VX_PIPE_LOAD_USE_STALL = 0x83,
+	SCORPION_VS_PIPE_WAR_STALL_CYCLES = 0x84,
+	SCORPION_VS_PIPE_WAW_STALL_CYCLES = 0x85,
+	SCORPION_VS_PIPE_RAW_STALL_CYCLES = 0x86,
+	SCORPION_EXCEPTIONS_INV_OPERATION = 0x87,
+	SCORPION_EXCEPTIONS_DIV_BY_ZERO = 0x88,
+	SCORPION_COND_INST_FAIL_VX_PIPE = 0x89,
+	SCORPION_COND_INST_FAIL_VS_PIPE = 0x8a,
+	SCORPION_EXCEPTIONS_OVERFLOW = 0x8b,
+	SCORPION_EXCEPTIONS_UNDERFLOW = 0x8c,
+	SCORPION_EXCEPTIONS_DENORM = 0x8d,
+#ifdef CONFIG_ARCH_MSM_SCORPIONMP
+	SCORPIONMP_NUM_BARRIERS = 0x8e,
+	SCORPIONMP_BARRIER_CYCLES = 0x8f,
+#else
+	SCORPION_BANK_AB_HIT = 0x8e,
+	SCORPION_BANK_AB_ACCESS = 0x8f,
+	SCORPION_BANK_CD_HIT = 0x90,
+	SCORPION_BANK_CD_ACCESS = 0x91,
+	SCORPION_BANK_AB_DSIDE_HIT = 0x92,
+	SCORPION_BANK_AB_DSIDE_ACCESS = 0x93,
+	SCORPION_BANK_CD_DSIDE_HIT = 0x94,
+	SCORPION_BANK_CD_DSIDE_ACCESS = 0x95,
+	SCORPION_BANK_AB_ISIDE_HIT = 0x96,
+	SCORPION_BANK_AB_ISIDE_ACCESS = 0x97,
+	SCORPION_BANK_CD_ISIDE_HIT = 0x98,
+	SCORPION_BANK_CD_ISIDE_ACCESS = 0x99,
+	SCORPION_ISIDE_RD_WAIT = 0x9a,
+	SCORPION_DSIDE_RD_WAIT = 0x9b,
+	SCORPION_BANK_BYPASS_WRITE = 0x9c,
+	SCORPION_BANK_AB_NON_CASTOUT = 0x9d,
+	SCORPION_BANK_AB_L2_CASTOUT = 0x9e,
+	SCORPION_BANK_CD_NON_CASTOUT = 0x9f,
+	SCORPION_BANK_CD_L2_CASTOUT = 0xa0,
+#endif
+	MSM_MAX_EVT
+};
+
+struct scorp_evt {
+	u32 evt_type;
+	u32 val;
+	u8 grp;
+	u32 evt_type_act;
+};
+
+static const struct scorp_evt sc_evt[] = {
+	{SCORPION_ICACHE_EXPL_INV, 0x80000500, 0, 0x4d},
+	{SCORPION_ICACHE_MISS, 0x80050000, 0, 0x4e},
+	{SCORPION_ICACHE_ACCESS, 0x85000000, 0, 0x4f},
+	{SCORPION_ICACHE_CACHEREQ_L2, 0x86000000, 0, 0x4f},
+	{SCORPION_ICACHE_NOCACHE_L2, 0x87000000, 0, 0x4f},
+	{SCORPION_HIQUP_NOPED, 0x80080000, 0, 0x4e},
+	{SCORPION_DATA_ABORT, 0x8000000a, 0, 0x4c},
+	{SCORPION_IRQ, 0x80000a00, 0, 0x4d},
+	{SCORPION_FIQ, 0x800a0000, 0, 0x4e},
+	{SCORPION_ALL_EXCPT, 0x8a000000, 0, 0x4f},
+	{SCORPION_UNDEF, 0x8000000b, 0, 0x4c},
+	{SCORPION_SVC, 0x80000b00, 0, 0x4d},
+	{SCORPION_SMC, 0x800b0000, 0, 0x4e},
+	{SCORPION_PREFETCH_ABORT, 0x8b000000, 0, 0x4f},
+	{SCORPION_INDEX_CHECK, 0x8000000c, 0, 0x4c},
+	{SCORPION_NULL_CHECK, 0x80000c00, 0, 0x4d},
+	{SCORPION_EXPL_ICIALLU, 0x8000000d, 0, 0x4c},
+	{SCORPION_IMPL_ICIALLU, 0x80000d00, 0, 0x4d},
+	{SCORPION_NONICIALLU_BTAC_INV, 0x800d0000, 0, 0x4e},
+	{SCORPION_ICIMVAU_IMPL_ICIALLU, 0x8d000000, 0, 0x4f},
+
+	{SCORPION_SPIPE_ONLY_CYCLES, 0x80000600, 1, 0x51},
+	{SCORPION_XPIPE_ONLY_CYCLES, 0x80060000, 1, 0x52},
+	{SCORPION_DUAL_CYCLES, 0x86000000, 1, 0x53},
+	{SCORPION_DISPATCH_ANY_CYCLES, 0x89000000, 1, 0x53},
+	{SCORPION_FIFO_FULLBLK_CMT, 0x8000000d, 1, 0x50},
+	{SCORPION_FAIL_COND_INST, 0x800d0000, 1, 0x52},
+	{SCORPION_PASS_COND_INST, 0x8d000000, 1, 0x53},
+	{SCORPION_ALLOW_VU_CLK, 0x8000000e, 1, 0x50},
+	{SCORPION_VU_IDLE, 0x80000e00, 1, 0x51},
+	{SCORPION_ALLOW_L2_CLK, 0x800e0000, 1, 0x52},
+	{SCORPION_L2_IDLE, 0x8e000000, 1, 0x53},
+
+	{SCORPION_DTLB_IMPL_INV_SCTLR_DACR, 0x80000001, 2, 0x54},
+	{SCORPION_DTLB_EXPL_INV, 0x80000100, 2, 0x55},
+	{SCORPION_DTLB_MISS, 0x80010000, 2, 0x56},
+	{SCORPION_DTLB_ACCESS, 0x81000000, 2, 0x57},
+	{SCORPION_ITLB_MISS, 0x80000200, 2, 0x55},
+	{SCORPION_ITLB_IMPL_INV, 0x80020000, 2, 0x56},
+	{SCORPION_ITLB_EXPL_INV, 0x82000000, 2, 0x57},
+	{SCORPION_UTLB_D_MISS, 0x80000003, 2, 0x54},
+	{SCORPION_UTLB_D_ACCESS, 0x80000300, 2, 0x55},
+	{SCORPION_UTLB_I_MISS, 0x80030000, 2, 0x56},
+	{SCORPION_UTLB_I_ACCESS, 0x83000000, 2, 0x57},
+	{SCORPION_UTLB_INV_ASID, 0x80000400, 2, 0x55},
+	{SCORPION_UTLB_INV_MVA, 0x80040000, 2, 0x56},
+	{SCORPION_UTLB_INV_ALL, 0x84000000, 2, 0x57},
+	{SCORPION_S2_HOLD_RDQ_UNAVAIL, 0x80000800, 2, 0x55},
+	{SCORPION_S2_HOLD, 0x88000000, 2, 0x57},
+	{SCORPION_S2_HOLD_DEV_OP, 0x80000900, 2, 0x55},
+	{SCORPION_S2_HOLD_ORDER, 0x80090000, 2, 0x56},
+	{SCORPION_S2_HOLD_BARRIER, 0x89000000, 2, 0x57},
+
+	{SCORPION_VIU_DUAL_CYCLE, 0x80000001, 4, 0x5c},
+	{SCORPION_VIU_SINGLE_CYCLE, 0x80000100, 4, 0x5d},
+	{SCORPION_VX_PIPE_WAR_STALL_CYCLES, 0x80000005, 4, 0x5c},
+	{SCORPION_VX_PIPE_WAW_STALL_CYCLES, 0x80000500, 4, 0x5d},
+	{SCORPION_VX_PIPE_RAW_STALL_CYCLES, 0x80050000, 4, 0x5e},
+	{SCORPION_VX_PIPE_LOAD_USE_STALL, 0x80000007, 4, 0x5c},
+	{SCORPION_VS_PIPE_WAR_STALL_CYCLES, 0x80000008, 4, 0x5c},
+	{SCORPION_VS_PIPE_WAW_STALL_CYCLES, 0x80000800, 4, 0x5d},
+	{SCORPION_VS_PIPE_RAW_STALL_CYCLES, 0x80080000, 4, 0x5e},
+	{SCORPION_EXCEPTIONS_INV_OPERATION, 0x8000000b, 4, 0x5c},
+	{SCORPION_EXCEPTIONS_DIV_BY_ZERO, 0x80000b00, 4, 0x5d},
+	{SCORPION_COND_INST_FAIL_VX_PIPE, 0x800b0000, 4, 0x5e},
+	{SCORPION_COND_INST_FAIL_VS_PIPE, 0x8b000000, 4, 0x5f},
+	{SCORPION_EXCEPTIONS_OVERFLOW, 0x8000000c, 4, 0x5c},
+	{SCORPION_EXCEPTIONS_UNDERFLOW, 0x80000c00, 4, 0x5d},
+	{SCORPION_EXCEPTIONS_DENORM, 0x8c000000, 4, 0x5f},
+
+#ifdef CONFIG_ARCH_MSM_SCORPIONMP
+	{SCORPIONMP_NUM_BARRIERS, 0x80000e00, 3, 0x59},
+	{SCORPIONMP_BARRIER_CYCLES, 0x800e0000, 3, 0x5a},
+#else
+	{SCORPION_BANK_AB_HIT, 0x80000001, 3, 0x58},
+	{SCORPION_BANK_AB_ACCESS, 0x80000100, 3, 0x59},
+	{SCORPION_BANK_CD_HIT, 0x80010000, 3, 0x5a},
+	{SCORPION_BANK_CD_ACCESS, 0x81000000, 3, 0x5b},
+	{SCORPION_BANK_AB_DSIDE_HIT, 0x80000002, 3, 0x58},
+	{SCORPION_BANK_AB_DSIDE_ACCESS, 0x80000200, 3, 0x59},
+	{SCORPION_BANK_CD_DSIDE_HIT, 0x80020000, 3, 0x5a},
+	{SCORPION_BANK_CD_DSIDE_ACCESS, 0x82000000, 3, 0x5b},
+	{SCORPION_BANK_AB_ISIDE_HIT, 0x80000003, 3, 0x58},
+	{SCORPION_BANK_AB_ISIDE_ACCESS, 0x80000300, 3, 0x59},
+	{SCORPION_BANK_CD_ISIDE_HIT, 0x80030000, 3, 0x5a},
+	{SCORPION_BANK_CD_ISIDE_ACCESS, 0x83000000, 3, 0x5b},
+	{SCORPION_ISIDE_RD_WAIT, 0x80000009, 3, 0x58},
+	{SCORPION_DSIDE_RD_WAIT, 0x80090000, 3, 0x5a},
+	{SCORPION_BANK_BYPASS_WRITE, 0x8000000a, 3, 0x58},
+	{SCORPION_BANK_AB_NON_CASTOUT, 0x8000000c, 3, 0x58},
+	{SCORPION_BANK_AB_L2_CASTOUT, 0x80000c00, 3, 0x59},
+	{SCORPION_BANK_CD_NON_CASTOUT, 0x800c0000, 3, 0x5a},
+	{SCORPION_BANK_CD_L2_CASTOUT, 0x8c000000, 3, 0x5b},
+#endif
+};
+
+static inline void scorpion_pmnc_write(u32 val)
+{
+	val &= PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+static inline u32 scorpion_pmnc_read(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_ccnt_read(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_cntn_read(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_pmnc_enable_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u enabling wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	if (cnt == CCNT)
+		val = CCNT_REG;
+	else
+		val = (1 << (cnt - CNT0));
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+
+	return cnt;
+}
+
+static inline u32 scorpion_pmnc_disable_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u disabling wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	if (cnt == CCNT)
+		val = CCNT_REG;
+	else
+		val = (1 << (cnt - CNT0));
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+
+	return cnt;
+}
+
+static inline int scorpion_pmnc_select_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if ((cnt == CCNT) || (cnt >= CNTMAX)) {
+		pr_err("gator: CPU%u selecting wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	val = (cnt - CNT0);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+
+	return cnt;
+}
+
+static u32 scorpion_read_lpm0(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm0(u32 val)
+{
+	asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_lpm1(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm1(u32 val)
+{
+	asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_lpm2(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm2(u32 val)
+{
+	asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_l2lpm(void)
+{
+	u32 val;
+
+	asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_l2lpm(u32 val)
+{
+	asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_vlpm(void)
+{
+	u32 val;
+
+	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_vlpm(u32 val)
+{
+	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+struct scorpion_access_funcs {
+	u32 (*read)(void);
+	void (*write)(u32);
+};
+
+struct scorpion_access_funcs scor_func[] = {
+	{scorpion_read_lpm0, scorpion_write_lpm0},
+	{scorpion_read_lpm1, scorpion_write_lpm1},
+	{scorpion_read_lpm2, scorpion_write_lpm2},
+	{scorpion_read_l2lpm, scorpion_write_l2lpm},
+	{scorpion_read_vlpm, scorpion_write_vlpm},
+};
+
+u32 venum_orig_val;
+u32 fp_orig_val;
+
+static void scorpion_pre_vlpm(void)
+{
+	u32 venum_new_val;
+	u32 fp_new_val;
+
+	/* CPACR Enable CP10 access */
+	asm volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (venum_orig_val));
+	venum_new_val = venum_orig_val | 0x00300000;
+	asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_new_val));
+	/* Enable FPEXC */
+	asm volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (fp_orig_val));
+	fp_new_val = fp_orig_val | 0x40000000;
+	asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_new_val));
+}
+
+static void scorpion_post_vlpm(void)
+{
+	/* Restore FPEXC */
+	asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_orig_val));
+	/* Restore CPACR */
+	asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_orig_val));
+}
+
+#define COLMN0MASK 0x000000ff
+#define COLMN1MASK 0x0000ff00
+#define COLMN2MASK 0x00ff0000
+static u32 scorpion_get_columnmask(u32 setval)
+{
+	if (setval & COLMN0MASK)
+		return 0xffffff00;
+	if (setval & COLMN1MASK)
+		return 0xffff00ff;
+	if (setval & COLMN2MASK)
+		return 0xff00ffff;
+	return 0x80ffffff;
+}
+
+static void scorpion_evt_setup(u32 gr, u32 setval)
+{
+	u32 val;
+
+	if (gr == 4)
+		scorpion_pre_vlpm();
+	val = scorpion_get_columnmask(setval) & scor_func[gr].read();
+	val = val | setval;
+	scor_func[gr].write(val);
+	if (gr == 4)
+		scorpion_post_vlpm();
+}
+
+static int get_scorpion_evtinfo(unsigned int evt_type, struct scorp_evt *evtinfo)
+{
+	u32 idx;
+
+	if ((evt_type < 0x4c) || (evt_type >= MSM_MAX_EVT))
+		return 0;
+	idx = evt_type - 0x4c;
+	if (sc_evt[idx].evt_type == evt_type) {
+		evtinfo->val = sc_evt[idx].val;
+		evtinfo->grp = sc_evt[idx].grp;
+		evtinfo->evt_type_act = sc_evt[idx].evt_type_act;
+		return 1;
+	}
+	return 0;
+}
+
+static inline void scorpion_pmnc_write_evtsel(unsigned int cnt, u32 val)
+{
+	if (scorpion_pmnc_select_counter(cnt) == cnt) {
+		if (val < 0x40) {
+			asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+		} else {
+			u32 zero = 0;
+			struct scorp_evt evtinfo;
+			/* extract evtinfo.grp and evtinfo.tevt_type_act from val */
+			if (get_scorpion_evtinfo(val, &evtinfo) == 0)
+				return;
+			asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (evtinfo.evt_type_act));
+			asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (zero));
+			scorpion_evt_setup(evtinfo.grp, val);
+		}
+	}
+}
+
+static void scorpion_pmnc_reset_counter(unsigned int cnt)
+{
+	u32 val = 0;
+
+	if (cnt == CCNT) {
+		scorpion_pmnc_disable_counter(cnt);
+
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (val));
+
+		if (pmnc_enabled[cnt] != 0)
+			scorpion_pmnc_enable_counter(cnt);
+
+	} else if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u resetting wrong PMNC counter %d\n", smp_processor_id(), cnt);
+	} else {
+		scorpion_pmnc_disable_counter(cnt);
+
+		if (scorpion_pmnc_select_counter(cnt) == cnt)
+			asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (val));
+
+		if (pmnc_enabled[cnt] != 0)
+			scorpion_pmnc_enable_counter(cnt);
+	}
+}
+
+static int gator_events_scorpion_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < pmnc_counters; i++) {
+		char buf[40];
+
+		if (i == 0)
+			snprintf(buf, sizeof(buf), "%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "%s_cnt%d", pmnc_name, i - 1);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i > 0)
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+	}
+
+	return 0;
+}
+
+static int gator_events_scorpion_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+
+	if (scorpion_pmnc_read() & PMNC_E)
+		scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E);
+
+	/* Initialize & Reset PMNC: C bit and P bit */
+	scorpion_pmnc_write(PMNC_P | PMNC_C);
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		/* disable counter */
+		scorpion_pmnc_disable_counter(cnt);
+
+		event = pmnc_event[cnt] & 255;
+
+		/* Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count */
+		if (cnt != CCNT)
+			scorpion_pmnc_write_evtsel(cnt, event);
+
+		/* reset counter */
+		scorpion_pmnc_reset_counter(cnt);
+
+		/* Enable counter, do not enable interrupt for this counter */
+		scorpion_pmnc_enable_counter(cnt);
+	}
+
+	/* enable */
+	scorpion_pmnc_write(scorpion_pmnc_read() | PMNC_E);
+
+	/* read the counters and toss the invalid data, return zero instead */
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			if (cnt == CCNT)
+				scorpion_ccnt_read();
+			else if (scorpion_pmnc_select_counter(cnt) == cnt)
+				scorpion_cntn_read();
+			scorpion_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_scorpion_offline(int **buffer, bool migrate)
+{
+	scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E);
+	return 0;
+}
+
+static void gator_events_scorpion_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_scorpion_read(int **buffer, bool sched_switch)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(scorpion_pmnc_read() & PMNC_E))
+		return 0;
+
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			int value;
+
+			if (cnt == CCNT)
+				value = scorpion_ccnt_read();
+			else if (scorpion_pmnc_select_counter(cnt) == cnt)
+				value = scorpion_cntn_read();
+			else
+				value = 0;
+			scorpion_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_scorpion_interface = {
+	.create_files = gator_events_scorpion_create_files,
+	.stop = gator_events_scorpion_stop,
+	.online = gator_events_scorpion_online,
+	.offline = gator_events_scorpion_offline,
+	.read = gator_events_scorpion_read,
+};
+
+int gator_events_scorpion_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case SCORPION:
+		pmnc_name = "Scorpion";
+		pmnc_counters = 4;
+		break;
+	case SCORPIONMP:
+		pmnc_name = "ScorpionMP";
+		pmnc_counters = 4;
+		break;
+	default:
+		return -1;
+	}
+
+	/* CNT[n] + CCNT */
+	pmnc_counters++;
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_scorpion_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c
new file mode 100644
index 000000000000..d8fb357b9eda
--- /dev/null
+++ b/drivers/gator/gator_fs.c
@@ -0,0 +1,370 @@
+/**
+ * @file gatorfs.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * A simple filesystem for configuration and
+ * access of oprofile.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/uaccess.h>
+
+#define gatorfs_MAGIC 0x24051020
+#define TMPBUFSIZE 50
+DEFINE_SPINLOCK(gatorfs_lock);
+
+static struct inode *gatorfs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
+		inode->i_ino = get_next_ino();
+#endif
+		inode->i_mode = mode;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	}
+	return inode;
+}
+
+static const struct super_operations s_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+};
+
+static ssize_t gatorfs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val);
+
+	if (maxlen > TMPBUFSIZE)
+		maxlen = TMPBUFSIZE;
+	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
+}
+
+static ssize_t gatorfs_u64_to_user(u64 val, char __user *buf, size_t count, loff_t *offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%llu\n", val);
+
+	if (maxlen > TMPBUFSIZE)
+		maxlen = TMPBUFSIZE;
+	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
+}
+
+static int gatorfs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count)
+{
+	char tmpbuf[TMPBUFSIZE];
+	unsigned long flags;
+
+	if (!count)
+		return 0;
+
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+
+	spin_lock_irqsave(&gatorfs_lock, flags);
+	*val = simple_strtoul(tmpbuf, NULL, 0);
+	spin_unlock_irqrestore(&gatorfs_lock, flags);
+	return 0;
+}
+
+static int gatorfs_u64_from_user(u64 *val, char const __user *buf, size_t count)
+{
+	char tmpbuf[TMPBUFSIZE];
+	unsigned long flags;
+
+	if (!count)
+		return 0;
+
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+
+	spin_lock_irqsave(&gatorfs_lock, flags);
+	*val = simple_strtoull(tmpbuf, NULL, 0);
+	spin_unlock_irqrestore(&gatorfs_lock, flags);
+	return 0;
+}
+
+static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long *val = file->private_data;
+
+	return gatorfs_ulong_to_user(*val, buf, count, offset);
+}
+
+static ssize_t u64_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	u64 *val = file->private_data;
+
+	return gatorfs_u64_to_user(*val, buf, count, offset);
+}
+
+static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long *value = file->private_data;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(value, buf, count);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static ssize_t u64_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	u64 *value = file->private_data;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_u64_from_user(value, buf, count);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static int default_open(struct inode *inode, struct file *filp)
+{
+	if (inode->i_private)
+		filp->private_data = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations ulong_fops = {
+	.read = ulong_read_file,
+	.write = ulong_write_file,
+	.open = default_open,
+};
+
+static const struct file_operations u64_fops = {
+	.read = u64_read_file,
+	.write = u64_write_file,
+	.open = default_open,
+};
+
+static const struct file_operations ulong_ro_fops = {
+	.read = ulong_read_file,
+	.open = default_open,
+};
+
+static const struct file_operations u64_ro_fops = {
+	.read = u64_read_file,
+	.open = default_open,
+};
+
+static struct dentry *__gatorfs_create_file(struct super_block *sb,
+					    struct dentry *root,
+					    char const *name,
+					    const struct file_operations *fops,
+					    int perm)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(root, name);
+	if (!dentry)
+		return NULL;
+	inode = gatorfs_get_inode(sb, S_IFREG | perm);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+	inode->i_fop = fops;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+int gatorfs_create_ulong(struct super_block *sb, struct dentry *root,
+			 char const *name, unsigned long *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &ulong_fops, 0644);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+static int gatorfs_create_u64(struct super_block *sb, struct dentry *root,
+			      char const *name, u64 *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &u64_fops, 0644);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
+			    char const *name, unsigned long *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &ulong_ro_fops, 0444);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+static int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root,
+				 char const *name, u64 *val)
+{
+	struct dentry *d =
+	    __gatorfs_create_file(sb, root, name, &u64_ro_fops, 0444);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	atomic_t *val = file->private_data;
+
+	return gatorfs_ulong_to_user(atomic_read(val), buf, count, offset);
+}
+
+static const struct file_operations atomic_ro_fops = {
+	.read = atomic_read_file,
+	.open = default_open,
+};
+
+static int gatorfs_create_file(struct super_block *sb, struct dentry *root,
+			       char const *name, const struct file_operations *fops)
+{
+	if (!__gatorfs_create_file(sb, root, name, fops, 0644))
+		return -EFAULT;
+	return 0;
+}
+
+static int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root,
+				    char const *name,
+				    const struct file_operations *fops, int perm)
+{
+	if (!__gatorfs_create_file(sb, root, name, fops, perm))
+		return -EFAULT;
+	return 0;
+}
+
+struct dentry *gatorfs_mkdir(struct super_block *sb,
+			     struct dentry *root, char const *name)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(root, name);
+	if (!dentry)
+		return NULL;
+	inode = gatorfs_get_inode(sb, S_IFDIR | 0755);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+static int gatorfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *root_inode;
+	struct dentry *root_dentry;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = gatorfs_MAGIC;
+	sb->s_op = &s_ops;
+	sb->s_time_gran = 1;
+
+	root_inode = gatorfs_get_inode(sb, S_IFDIR | 0755);
+	if (!root_inode)
+		return -ENOMEM;
+	root_inode->i_op = &simple_dir_inode_operations;
+	root_inode->i_fop = &simple_dir_operations;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+	root_dentry = d_alloc_root(root_inode);
+#else
+	root_dentry = d_make_root(root_inode);
+#endif
+
+	if (!root_dentry) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+		iput(root_inode);
+#endif
+		return -ENOMEM;
+	}
+
+	sb->s_root = root_dentry;
+
+	gator_op_create_files(sb, root_dentry);
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
+static int gatorfs_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, gatorfs_fill_super, mnt);
+}
+#else
+static struct dentry *gatorfs_mount(struct file_system_type *fs_type,
+				    int flags, const char *dev_name, void *data)
+{
+	return mount_nodev(fs_type, flags, data, gatorfs_fill_super);
+}
+#endif
+
+static struct file_system_type gatorfs_type = {
+	.owner = THIS_MODULE,
+	.name = "gatorfs",
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
+	.get_sb = gatorfs_get_sb,
+#else
+	.mount = gatorfs_mount,
+#endif
+
+	.kill_sb = kill_litter_super,
+};
+
+static int __init gatorfs_register(void)
+{
+	return register_filesystem(&gatorfs_type);
+}
+
+static void gatorfs_unregister(void)
+{
+	unregister_filesystem(&gatorfs_type);
+}
diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c
new file mode 100644
index 000000000000..c1525e10a8da
--- /dev/null
+++ b/drivers/gator/gator_hrtimer_gator.c
@@ -0,0 +1,80 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+void (*callback)(void);
+DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
+DEFINE_PER_CPU(ktime_t, hrtimer_expire);
+DEFINE_PER_CPU(int, hrtimer_is_active);
+static ktime_t profiling_interval;
+static void gator_hrtimer_online(void);
+static void gator_hrtimer_offline(void);
+
+static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
+{
+	int cpu = get_logical_cpu();
+
+	hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval);
+	per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval);
+	(*callback)();
+	return HRTIMER_RESTART;
+}
+
+static void gator_hrtimer_online(void)
+{
+	int cpu = get_logical_cpu();
+	struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
+
+	if (per_cpu(hrtimer_is_active, cpu) || profiling_interval.tv64 == 0)
+		return;
+
+	per_cpu(hrtimer_is_active, cpu) = 1;
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer->function = gator_hrtimer_notify;
+#ifdef CONFIG_PREEMPT_RT_BASE
+	hrtimer->irqsafe = 1;
+#endif
+	per_cpu(hrtimer_expire, cpu) = ktime_add(hrtimer->base->get_time(), profiling_interval);
+	hrtimer_start(hrtimer, per_cpu(hrtimer_expire, cpu), HRTIMER_MODE_ABS_PINNED);
+}
+
+static void gator_hrtimer_offline(void)
+{
+	int cpu = get_logical_cpu();
+	struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
+
+	if (!per_cpu(hrtimer_is_active, cpu))
+		return;
+
+	per_cpu(hrtimer_is_active, cpu) = 0;
+	hrtimer_cancel(hrtimer);
+}
+
+static int gator_hrtimer_init(int interval, void (*func)(void))
+{
+	int cpu;
+
+	(callback) = (func);
+
+	for_each_present_cpu(cpu) {
+		per_cpu(hrtimer_is_active, cpu) = 0;
+	}
+
+	/* calculate profiling interval */
+	if (interval > 0)
+		profiling_interval = ns_to_ktime(1000000000UL / interval);
+	else
+		profiling_interval.tv64 = 0;
+
+	return 0;
+}
+
+static void gator_hrtimer_shutdown(void)
+{
+	/* empty */
+}
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
new file mode 100644
index 000000000000..fb78c10fd987
--- /dev/null
+++ b/drivers/gator/gator_iks.c
@@ -0,0 +1,197 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#if GATOR_IKS_SUPPORT
+
+#include <linux/of.h>
+#include <asm/bL_switcher.h>
+#include <asm/smp_plat.h>
+#include <trace/events/power_cpu_migrate.h>
+
+static bool map_cpuids;
+static int mpidr_cpuids[NR_CPUS];
+static const struct gator_cpu *mpidr_cpus[NR_CPUS];
+static int __lcpu_to_pcpu[NR_CPUS];
+
+static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+
+		if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0)
+			return gator_cpu;
+	}
+
+	return NULL;
+}
+
+static void calc_first_cluster_size(void)
+{
+	int len;
+	const u32 *val;
+	const char *compatible;
+	struct device_node *cn = NULL;
+	int mpidr_cpuids_count = 0;
+
+	/* Zero is a valid cpuid, so initialize the array to 0xff's */
+	memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids));
+	memset(&mpidr_cpus, 0, sizeof(mpidr_cpus));
+
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+		BUG_ON(mpidr_cpuids_count >= NR_CPUS);
+
+		val = of_get_property(cn, "reg", &len);
+		if (!val || len != 4) {
+			pr_err("%s missing reg property\n", cn->full_name);
+			continue;
+		}
+		compatible = of_get_property(cn, "compatible", NULL);
+		if (compatible == NULL) {
+			pr_err("%s missing compatible property\n", cn->full_name);
+			continue;
+		}
+
+		mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val);
+		mpidr_cpus[mpidr_cpuids_count] = gator_find_cpu_by_dt_name(compatible);
+		++mpidr_cpuids_count;
+	}
+
+	map_cpuids = (mpidr_cpuids_count == nr_cpu_ids);
+}
+
+static int linearize_mpidr(int mpidr)
+{
+	int i;
+
+	for (i = 0; i < nr_cpu_ids; ++i) {
+		if (mpidr_cpuids[i] == mpidr)
+			return i;
+	}
+
+	BUG();
+}
+
+int lcpu_to_pcpu(const int lcpu)
+{
+	int pcpu;
+
+	if (!map_cpuids)
+		return lcpu;
+
+	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+	pcpu = __lcpu_to_pcpu[lcpu];
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	return pcpu;
+}
+
+int pcpu_to_lcpu(const int pcpu)
+{
+	int lcpu;
+
+	if (!map_cpuids)
+		return pcpu;
+
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) {
+		if (__lcpu_to_pcpu[lcpu] == pcpu) {
+			BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+			return lcpu;
+		}
+	}
+	BUG();
+}
+
+static void gator_update_cpu_mapping(u32 cpu_hwid)
+{
+	int lcpu = smp_processor_id();
+	int pcpu = linearize_mpidr(cpu_hwid & MPIDR_HWID_BITMASK);
+
+	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	__lcpu_to_pcpu[lcpu] = pcpu;
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_begin, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	const int cpu = get_physical_cpu();
+
+	gator_timer_offline((void *)1);
+	gator_timer_offline_dispatch(cpu, true);
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_finish, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	int cpu;
+
+	gator_update_cpu_mapping(cpu_hwid);
+
+	/* get_physical_cpu must be called after gator_update_cpu_mapping */
+	cpu = get_physical_cpu();
+	gator_timer_online_dispatch(cpu, true);
+	gator_timer_online((void *)1);
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	gator_update_cpu_mapping(cpu_hwid);
+}
+
+static void gator_send_iks_core_names(void)
+{
+	int cpu;
+	/* Send the cpu names */
+	preempt_disable();
+	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+		if (mpidr_cpus[cpu] != NULL)
+			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid);
+	}
+	preempt_enable();
+}
+
+static int gator_migrate_start(void)
+{
+	int retval = 0;
+
+	if (!map_cpuids)
+		return retval;
+
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_begin);
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_finish);
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_current);
+	if (retval == 0) {
+		/* Initialize the logical to physical cpu mapping */
+		memset(&__lcpu_to_pcpu, 0xff, sizeof(__lcpu_to_pcpu));
+		bL_switcher_trace_trigger();
+	}
+	return retval;
+}
+
+static void gator_migrate_stop(void)
+{
+	if (!map_cpuids)
+		return;
+
+	GATOR_UNREGISTER_TRACE(cpu_migrate_current);
+	GATOR_UNREGISTER_TRACE(cpu_migrate_finish);
+	GATOR_UNREGISTER_TRACE(cpu_migrate_begin);
+}
+
+#else
+
+#define calc_first_cluster_size()
+#define gator_send_iks_core_names()
+#define gator_migrate_start() 0
+#define gator_migrate_stop()
+
+#endif
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
new file mode 100644
index 000000000000..30bf60d95286
--- /dev/null
+++ b/drivers/gator/gator_main.c
@@ -0,0 +1,1491 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/* This version must match the gator daemon version */
+#define PROTOCOL_VERSION 20
+static unsigned long gator_protocol_version = PROTOCOL_VERSION;
+
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/utsname.h>
+#include <linux/kthread.h>
+#include <asm/stacktrace.h>
+#include <linux/uaccess.h>
+
+#include "gator.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#error kernels prior to 2.6.32 are not supported
+#endif
+
+#if defined(MODULE) && !defined(CONFIG_MODULES)
+#error Cannot build a module against a kernel that does not support modules. To resolve, either rebuild the kernel to support modules or build gator as part of the kernel.
+#endif
+
+#if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING)
+#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined
+#endif
+
+#ifndef CONFIG_PROFILING
+#error gator requires the kernel to have CONFIG_PROFILING defined
+#endif
+
+#ifndef CONFIG_HIGH_RES_TIMERS
+#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS)
+#error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems
+#endif
+
+#if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT))
+#ifndef CONFIG_PERF_EVENTS
+#error gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters
+#elif !defined CONFIG_HW_PERF_EVENTS
+#error gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters
+#endif
+#endif
+
+/******************************************************************************
+ * DEFINES
+ ******************************************************************************/
+#define SUMMARY_BUFFER_SIZE       (1*1024)
+#define BACKTRACE_BUFFER_SIZE     (128*1024)
+#define NAME_BUFFER_SIZE          (64*1024)
+#define COUNTER_BUFFER_SIZE       (64*1024)	/* counters have the core as part of the data and the core value in the frame header may be discarded */
+#define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
+#define ANNOTATE_BUFFER_SIZE      (128*1024)	/* annotate counters have the core as part of the data and the core value in the frame header may be discarded */
+#define SCHED_TRACE_BUFFER_SIZE   (128*1024)
+#define IDLE_BUFFER_SIZE          (32*1024)	/* idle counters have the core as part of the data and the core value in the frame header may be discarded */
+#define ACTIVITY_BUFFER_SIZE      (128*1024)
+
+#define NO_COOKIE      0U
+#define UNRESOLVED_COOKIE ~0U
+
+#define FRAME_SUMMARY       1
+#define FRAME_BACKTRACE     2
+#define FRAME_NAME          3
+#define FRAME_COUNTER       4
+#define FRAME_BLOCK_COUNTER 5
+#define FRAME_ANNOTATE      6
+#define FRAME_SCHED_TRACE   7
+#define FRAME_IDLE          9
+#define FRAME_ACTIVITY     13
+
+#define MESSAGE_END_BACKTRACE 1
+
+/* Name Frame Messages */
+#define MESSAGE_COOKIE      1
+#define MESSAGE_THREAD_NAME 2
+#define MESSAGE_LINK        4
+
+/* Scheduler Trace Frame Messages */
+#define MESSAGE_SCHED_SWITCH 1
+#define MESSAGE_SCHED_EXIT   2
+
+/* Idle Frame Messages */
+#define MESSAGE_IDLE_ENTER 1
+#define MESSAGE_IDLE_EXIT  2
+
+/* Summary Frame Messages */
+#define MESSAGE_SUMMARY   1
+#define MESSAGE_CORE_NAME 3
+
+/* Activity Frame Messages */
+#define MESSAGE_SWITCH 2
+#define MESSAGE_EXIT   3
+
+#define MAXSIZE_PACK32     5
+#define MAXSIZE_PACK64    10
+
+#define FRAME_HEADER_SIZE 3
+
+#if defined(__arm__)
+#define PC_REG regs->ARM_pc
+#elif defined(__aarch64__)
+#define PC_REG regs->pc
+#else
+#define PC_REG regs->ip
+#endif
+
+enum {
+	SUMMARY_BUF,
+	BACKTRACE_BUF,
+	NAME_BUF,
+	COUNTER_BUF,
+	BLOCK_COUNTER_BUF,
+	ANNOTATE_BUF,
+	SCHED_TRACE_BUF,
+	IDLE_BUF,
+	ACTIVITY_BUF,
+	NUM_GATOR_BUFS
+};
+
+/******************************************************************************
+ * Globals
+ ******************************************************************************/
+static unsigned long gator_cpu_cores;
+/* Size of the largest buffer. Effectively constant, set in gator_op_create_files */
+static unsigned long userspace_buffer_size;
+static unsigned long gator_backtrace_depth;
+/* How often to commit the buffers for live in nanoseconds */
+static u64 gator_live_rate;
+
+static unsigned long gator_started;
+static u64 gator_monotonic_started;
+static u64 gator_sync_time;
+static u64 gator_hibernate_time;
+static unsigned long gator_buffer_opened;
+static unsigned long gator_timer_count;
+static unsigned long gator_response_type;
+static DEFINE_MUTEX(start_mutex);
+static DEFINE_MUTEX(gator_buffer_mutex);
+
+bool event_based_sampling;
+
+static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait);
+static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait);
+static struct timer_list gator_buffer_wake_up_timer;
+static bool gator_buffer_wake_run;
+/* Initialize semaphore unlocked to initialize memory values */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+static DECLARE_MUTEX(gator_buffer_wake_sem);
+#else
+static DEFINE_SEMAPHORE(gator_buffer_wake_sem);
+#endif
+static struct task_struct *gator_buffer_wake_thread;
+static LIST_HEAD(gator_events);
+
+static DEFINE_PER_CPU(u64, last_timestamp);
+
+static bool printed_monotonic_warning;
+
+static u32 gator_cpuids[NR_CPUS];
+static bool sent_core_name[NR_CPUS];
+
+static DEFINE_PER_CPU(bool, in_scheduler_context);
+
+/******************************************************************************
+ * Prototypes
+ ******************************************************************************/
+static u64 gator_get_time(void);
+static void gator_emit_perf_time(u64 time);
+static void gator_op_create_files(struct super_block *sb, struct dentry *root);
+
+/* gator_buffer is protected by being per_cpu and by having IRQs
+ * disabled when writing to it. Most marshal_* calls take care of this
+ * except for marshal_cookie*, marshal_backtrace* and marshal_frame
+ * where the caller is responsible for doing so. No synchronization is
+ * needed with the backtrace buffer as it is per cpu and is only used
+ * from the hrtimer. The annotate_lock must be held when using the
+ * annotation buffer as it is not per cpu. collect_counters which is
+ * the sole writer to the block counter frame is additionally
+ * protected by the per cpu collecting flag.
+ */
+
+/* Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup. */
+static uint32_t gator_buffer_size[NUM_GATOR_BUFS];
+/* gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup. */
+static uint32_t gator_buffer_mask[NUM_GATOR_BUFS];
+/* Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read */
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_read);
+/* Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer */
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_write);
+/* Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace */
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit);
+/* If set to false, decreases the number of bytes returned by
+ * buffer_bytes_available. Set in buffer_check_space if no space is
+ * remaining. Initialized to true in gator_op_setup. This means that
+ * if we run out of space, continue to report that no space is
+ * available until bytes are read by userspace
+ */
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
+/* The buffer. Allocated in gator_op_setup */
+static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
+/* The time after which the buffer should be committed for live display */
+static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
+
+/* List of all gator events - new events must be added to this list */
+#define GATOR_EVENTS_LIST \
+	GATOR_EVENT(gator_events_armv6_init) \
+	GATOR_EVENT(gator_events_armv7_init) \
+	GATOR_EVENT(gator_events_block_init) \
+	GATOR_EVENT(gator_events_ccn504_init) \
+	GATOR_EVENT(gator_events_irq_init) \
+	GATOR_EVENT(gator_events_l2c310_init) \
+	GATOR_EVENT(gator_events_mali_init) \
+	GATOR_EVENT(gator_events_mali_midgard_hw_init) \
+	GATOR_EVENT(gator_events_mali_midgard_init) \
+	GATOR_EVENT(gator_events_meminfo_init) \
+	GATOR_EVENT(gator_events_mmapped_init) \
+	GATOR_EVENT(gator_events_net_init) \
+	GATOR_EVENT(gator_events_perf_pmu_init) \
+	GATOR_EVENT(gator_events_sched_init) \
+	GATOR_EVENT(gator_events_scorpion_init) \
+
+#define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void);
+GATOR_EVENTS_LIST
+#undef GATOR_EVENT
+
+static int (*gator_events_list[])(void) = {
+#define GATOR_EVENT(EVENT_INIT) EVENT_INIT,
+GATOR_EVENTS_LIST
+#undef GATOR_EVENT
+};
+
+/******************************************************************************
+ * Application Includes
+ ******************************************************************************/
+#include "gator_fs.c"
+#include "gator_buffer_write.c"
+#include "gator_buffer.c"
+#include "gator_marshaling.c"
+#include "gator_hrtimer_gator.c"
+#include "gator_cookies.c"
+#include "gator_annotate.c"
+#include "gator_trace_sched.c"
+#include "gator_trace_power.c"
+#include "gator_trace_gpu.c"
+#include "gator_backtrace.c"
+
+/******************************************************************************
+ * Misc
+ ******************************************************************************/
+
+static const struct gator_cpu gator_cpus[] = {
+	{
+		.cpuid = ARM1136,
+		.core_name = "ARM1136",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1136",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM1156,
+		.core_name = "ARM1156",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1156",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM1176,
+		.core_name = "ARM1176",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1176",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM11MPCORE,
+		.core_name = "ARM11MPCore",
+		.pmnc_name = "ARM_ARM11MPCore",
+		.dt_name = "arm,arm11mpcore",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = CORTEX_A5,
+		.core_name = "Cortex-A5",
+		.pmnc_name = "ARMv7_Cortex_A5",
+		.dt_name = "arm,cortex-a5",
+		.pmnc_counters = 2,
+	},
+	{
+		.cpuid = CORTEX_A7,
+		.core_name = "Cortex-A7",
+		.pmnc_name = "ARMv7_Cortex_A7",
+		.dt_name = "arm,cortex-a7",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A8,
+		.core_name = "Cortex-A8",
+		.pmnc_name = "ARMv7_Cortex_A8",
+		.dt_name = "arm,cortex-a8",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A9,
+		.core_name = "Cortex-A9",
+		.pmnc_name = "ARMv7_Cortex_A9",
+		.dt_name = "arm,cortex-a9",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A15,
+		.core_name = "Cortex-A15",
+		.pmnc_name = "ARMv7_Cortex_A15",
+		.dt_name = "arm,cortex-a15",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A17,
+		.core_name = "Cortex-A17",
+		.pmnc_name = "ARMv7_Cortex_A17",
+		.dt_name = "arm,cortex-a17",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = SCORPION,
+		.core_name = "Scorpion",
+		.pmnc_name = "Scorpion",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = SCORPIONMP,
+		.core_name = "ScorpionMP",
+		.pmnc_name = "ScorpionMP",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAITSIM,
+		.core_name = "KraitSIM",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAIT,
+		.core_name = "Krait",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAIT_S4_PRO,
+		.core_name = "Krait S4 Pro",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A53,
+		.core_name = "Cortex-A53",
+		.pmnc_name = "ARM_Cortex-A53",
+		.dt_name = "arm,cortex-a53",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A57,
+		.core_name = "Cortex-A57",
+		.pmnc_name = "ARM_Cortex-A57",
+		.dt_name = "arm,cortex-a57",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = AARCH64,
+		.core_name = "AArch64",
+		.pmnc_name = "ARM_AArch64",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = OTHER,
+		.core_name = "Other",
+		.pmnc_name = "Other",
+		.pmnc_counters = 6,
+	},
+	{}
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+
+		if (gator_cpu->cpuid == cpuid)
+			return gator_cpu;
+	}
+
+	return NULL;
+}
+
+static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
+static const char NEW_PMU_PREFIX[] = "ARMv7_Cortex_";
+
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+
+		if (gator_cpu->pmnc_name != NULL &&
+		    /* Do the names match exactly? */
+		    (strcasecmp(gator_cpu->pmnc_name, name) == 0 ||
+		     /* Do these names match but have the old vs new prefix? */
+		     ((strncasecmp(name, OLD_PMU_PREFIX, sizeof(OLD_PMU_PREFIX) - 1) == 0 &&
+		       strncasecmp(gator_cpu->pmnc_name, NEW_PMU_PREFIX, sizeof(NEW_PMU_PREFIX) - 1) == 0 &&
+		       strcasecmp(name + sizeof(OLD_PMU_PREFIX) - 1, gator_cpu->pmnc_name + sizeof(NEW_PMU_PREFIX) - 1) == 0))))
+			return gator_cpu;
+	}
+
+	return NULL;
+}
+
+u32 gator_cpuid(void)
+{
+#if defined(__arm__) || defined(__aarch64__)
+	u32 val;
+#if !defined(__aarch64__)
+	asm volatile("mrc p15, 0, %0, c0, c0, 0" : "=r" (val));
+#else
+	asm volatile("mrs %0, midr_el1" : "=r" (val));
+#endif
+	return (val >> 4) & 0xfff;
+#else
+	return OTHER;
+#endif
+}
+
+static void gator_buffer_wake_up(unsigned long data)
+{
+	wake_up(&gator_buffer_wait);
+}
+
+static int gator_buffer_wake_func(void *data)
+{
+	for (;;) {
+		if (down_killable(&gator_buffer_wake_sem))
+			break;
+
+		/* Eat up any pending events */
+		while (!down_trylock(&gator_buffer_wake_sem))
+			;
+
+		if (!gator_buffer_wake_run)
+			break;
+
+		gator_buffer_wake_up(0);
+	}
+
+	return 0;
+}
+
+/******************************************************************************
+ * Commit interface
+ ******************************************************************************/
+static bool buffer_commit_ready(int *cpu, int *buftype)
+{
+	int cpu_x, x;
+
+	for_each_present_cpu(cpu_x) {
+		for (x = 0; x < NUM_GATOR_BUFS; x++)
+			if (per_cpu(gator_buffer_commit, cpu_x)[x] != per_cpu(gator_buffer_read, cpu_x)[x]) {
+				*cpu = cpu_x;
+				*buftype = x;
+				return true;
+			}
+	}
+	*cpu = -1;
+	*buftype = -1;
+	return false;
+}
+
+/******************************************************************************
+ * hrtimer interrupt processing
+ ******************************************************************************/
+static void gator_timer_interrupt(void)
+{
+	struct pt_regs *const regs = get_irq_regs();
+
+	gator_backtrace_handler(regs);
+}
+
+void gator_backtrace_handler(struct pt_regs *const regs)
+{
+	u64 time = gator_get_time();
+	int cpu = get_physical_cpu();
+
+	/* Output backtrace */
+	gator_add_sample(cpu, regs, time);
+
+	/* Collect counters */
+	if (!per_cpu(collecting, cpu))
+		collect_counters(time, current, false);
+
+	/* No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed */
+#ifdef CONFIG_PREEMPT_RT_FULL
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+#endif
+}
+
+static int gator_running;
+
+/* This function runs in interrupt context and on the appropriate core */
+static void gator_timer_offline(void *migrate)
+{
+	struct gator_interface *gi;
+	int i, len, cpu = get_physical_cpu();
+	int *buffer;
+	u64 time;
+
+	gator_trace_sched_offline();
+	gator_trace_power_offline();
+
+	if (!migrate)
+		gator_hrtimer_offline();
+
+	/* Offline any events and output counters */
+	time = gator_get_time();
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->offline) {
+				len = gi->offline(&buffer, migrate);
+				marshal_event(len, buffer);
+			}
+		}
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+	}
+
+	/* Flush all buffers on this core */
+	for (i = 0; i < NUM_GATOR_BUFS; i++)
+		gator_commit_buffer(cpu, i, time);
+}
+
+/* This function runs in interrupt context and may be running on a core other than core 'cpu' */
+static void gator_timer_offline_dispatch(int cpu, bool migrate)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->offline_dispatch)
+			gi->offline_dispatch(cpu, migrate);
+	}
+}
+
+static void gator_timer_stop(void)
+{
+	int cpu;
+
+	if (gator_running) {
+		on_each_cpu(gator_timer_offline, NULL, 1);
+		for_each_online_cpu(cpu) {
+			gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+
+		gator_running = 0;
+		gator_hrtimer_shutdown();
+	}
+}
+
+static void gator_send_core_name(const int cpu, const u32 cpuid)
+{
+#if defined(__arm__) || defined(__aarch64__)
+	if (!sent_core_name[cpu] || (cpuid != gator_cpuids[cpu])) {
+		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
+		const char *core_name = NULL;
+		char core_name_buf[32];
+
+		/* Save off this cpuid */
+		gator_cpuids[cpu] = cpuid;
+		if (gator_cpu != NULL) {
+			core_name = gator_cpu->core_name;
+		} else {
+			if (cpuid == -1)
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown");
+			else
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			core_name = core_name_buf;
+		}
+
+		marshal_core_name(cpu, cpuid, core_name);
+		sent_core_name[cpu] = true;
+	}
+#endif
+}
+
+static void gator_read_cpuid(void *arg)
+{
+	gator_cpuids[get_physical_cpu()] = gator_cpuid();
+}
+
+/* This function runs in interrupt context and on the appropriate core */
+static void gator_timer_online(void *migrate)
+{
+	struct gator_interface *gi;
+	int len, cpu = get_physical_cpu();
+	int *buffer;
+	u64 time;
+
+	/* Send what is currently running on this core */
+	marshal_sched_trace_switch(current->pid, 0);
+
+	gator_trace_power_online();
+
+	/* online any events and output counters */
+	time = gator_get_time();
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->online) {
+				len = gi->online(&buffer, migrate);
+				marshal_event(len, buffer);
+			}
+		}
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+	}
+
+	if (!migrate)
+		gator_hrtimer_online();
+
+	gator_send_core_name(cpu, gator_cpuid());
+}
+
+/* This function runs in interrupt context and may be running on a core other than core 'cpu' */
+static void gator_timer_online_dispatch(int cpu, bool migrate)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->online_dispatch)
+			gi->online_dispatch(cpu, migrate);
+	}
+}
+
+#include "gator_iks.c"
+
+static int gator_timer_start(unsigned long sample_rate)
+{
+	int cpu;
+
+	if (gator_running) {
+		pr_notice("gator: already running\n");
+		return 0;
+	}
+
+	gator_running = 1;
+
+	/* event based sampling trumps hr timer based sampling */
+	if (event_based_sampling)
+		sample_rate = 0;
+
+	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
+		return -1;
+
+	/* Send off the previously saved cpuids */
+	for_each_present_cpu(cpu) {
+		preempt_disable();
+		gator_send_core_name(cpu, gator_cpuids[cpu]);
+		preempt_enable();
+	}
+
+	gator_send_iks_core_names();
+	for_each_online_cpu(cpu) {
+		gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+	}
+	on_each_cpu(gator_timer_online, NULL, 1);
+
+	return 0;
+}
+
+static u64 gator_get_time(void)
+{
+	struct timespec ts;
+	u64 timestamp;
+	u64 prev_timestamp;
+	u64 delta;
+	int cpu = smp_processor_id();
+
+	/* Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace */
+	getrawmonotonic(&ts);
+	timestamp = timespec_to_ns(&ts);
+
+	/* getrawmonotonic is not monotonic on all systems. Detect and
+	 * attempt to correct these cases. up to 0.5ms delta has been seen
+	 * on some systems, which can skew Streamline data when viewing at
+	 * high resolution. This doesn't work well with interrupts, but that
+	 * it's OK - the real concern is to catch big jumps in time
+	 */
+	prev_timestamp = per_cpu(last_timestamp, cpu);
+	if (prev_timestamp <= timestamp) {
+		per_cpu(last_timestamp, cpu) = timestamp;
+	} else {
+		delta = prev_timestamp - timestamp;
+		/* Log the error once */
+		if (!printed_monotonic_warning && delta > 500000) {
+			pr_err("%s: getrawmonotonic is not monotonic  cpu: %i  delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __func__, cpu, delta);
+			printed_monotonic_warning = true;
+		}
+		timestamp = prev_timestamp;
+	}
+
+	return timestamp - gator_monotonic_started;
+}
+
+static void gator_emit_perf_time(u64 time)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+	if (time >= gator_sync_time) {
+		int cpu = get_physical_cpu();
+
+		marshal_event_single64(0, -1, local_clock());
+		gator_sync_time += NSEC_PER_SEC;
+		gator_commit_buffer(cpu, COUNTER_BUF, time);
+	}
+#endif
+}
+
+/******************************************************************************
+ * cpu hotplug and pm notifiers
+ ******************************************************************************/
+static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	int cpu = lcpu_to_pcpu((long)hcpu);
+
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		smp_call_function_single(cpu, gator_timer_offline, NULL, 1);
+		gator_timer_offline_dispatch(cpu, false);
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		gator_timer_online_dispatch(cpu, false);
+		smp_call_function_single(cpu, gator_timer_online, NULL, 1);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata gator_hotcpu_notifier = {
+	.notifier_call = gator_hotcpu_notify,
+};
+
+/* n.b. calling "on_each_cpu" only runs on those that are online.
+ * Registered linux events are not disabled, so their counters will
+ * continue to collect
+ */
+static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy)
+{
+	int cpu;
+	struct timespec ts;
+
+	switch (event) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		unregister_hotcpu_notifier(&gator_hotcpu_notifier);
+		unregister_scheduler_tracepoints();
+		on_each_cpu(gator_timer_offline, NULL, 1);
+		for_each_online_cpu(cpu) {
+			gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+
+		/* Record the wallclock hibernate time */
+		getnstimeofday(&ts);
+		gator_hibernate_time = timespec_to_ns(&ts) - gator_get_time();
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		/* Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it */
+		if (gator_hibernate_time > 0) {
+			getnstimeofday(&ts);
+			gator_monotonic_started += gator_hibernate_time + gator_get_time() - timespec_to_ns(&ts);
+			gator_hibernate_time = 0;
+		}
+
+		for_each_online_cpu(cpu) {
+			gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+		on_each_cpu(gator_timer_online, NULL, 1);
+		register_scheduler_tracepoints();
+		register_hotcpu_notifier(&gator_hotcpu_notifier);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gator_pm_notifier = {
+	.notifier_call = gator_pm_notify,
+};
+
+static int gator_notifier_start(void)
+{
+	int retval;
+
+	retval = register_hotcpu_notifier(&gator_hotcpu_notifier);
+	if (retval == 0)
+		retval = register_pm_notifier(&gator_pm_notifier);
+	return retval;
+}
+
+static void gator_notifier_stop(void)
+{
+	unregister_pm_notifier(&gator_pm_notifier);
+	unregister_hotcpu_notifier(&gator_hotcpu_notifier);
+}
+
+/******************************************************************************
+ * Main
+ ******************************************************************************/
+static void gator_summary(void)
+{
+	u64 timestamp, uptime;
+	struct timespec ts;
+	char uname_buf[512];
+
+	snprintf(uname_buf, sizeof(uname_buf), "%s %s %s %s %s GNU/Linux", utsname()->sysname, utsname()->nodename, utsname()->release, utsname()->version, utsname()->machine);
+
+	getnstimeofday(&ts);
+	timestamp = timespec_to_ns(&ts);
+
+	/* Similar to reading /proc/uptime from fs/proc/uptime.c, calculate uptime */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
+	{
+		void (*m2b)(struct timespec *ts);
+
+		do_posix_clock_monotonic_gettime(&ts);
+		/* monotonic_to_bootbased is not defined for some versions of Android */
+		m2b = symbol_get(monotonic_to_bootbased);
+		if (m2b)
+			m2b(&ts);
+	}
+#else
+	get_monotonic_boottime(&ts);
+#endif
+	uptime = timespec_to_ns(&ts);
+
+	/* Disable preemption as gator_get_time calls smp_processor_id to verify time is monotonic */
+	preempt_disable();
+	/* Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started */
+	gator_monotonic_started = 0;
+	gator_monotonic_started = gator_get_time();
+
+	marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf);
+	gator_sync_time = 0;
+	gator_emit_perf_time(gator_monotonic_started);	
+	preempt_enable();
+}
+
+int gator_events_install(struct gator_interface *interface)
+{
+	list_add_tail(&interface->list, &gator_events);
+
+	return 0;
+}
+
+int gator_events_get_key(void)
+{
+	/* key 0 is reserved as a timestamp. key 1 is reserved as the marker
+	 * for thread specific counters. key 2 is reserved as the marker for
+	 * core. Odd keys are assigned by the driver, even keys by the
+	 * daemon.
+	 */
+	static int key = 3;
+	const int ret = key;
+
+	key += 2;
+	return ret;
+}
+
+static int gator_init(void)
+{
+	int i;
+
+	calc_first_cluster_size();
+
+	/* events sources */
+	for (i = 0; i < ARRAY_SIZE(gator_events_list); i++)
+		if (gator_events_list[i])
+			gator_events_list[i]();
+
+	gator_trace_sched_init();
+	gator_trace_power_init();
+
+	return 0;
+}
+
+static void gator_exit(void)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->shutdown)
+			gi->shutdown();
+}
+
+static int gator_start(void)
+{
+	unsigned long cpu, i;
+	struct gator_interface *gi;
+
+	gator_buffer_wake_run = true;
+	gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake");
+	if (IS_ERR(gator_buffer_wake_thread))
+		goto bwake_failure;
+
+	if (gator_migrate_start())
+		goto migrate_failure;
+
+	/* Initialize the buffer with the frame type and core */
+	for_each_present_cpu(cpu) {
+		for (i = 0; i < NUM_GATOR_BUFS; i++)
+			marshal_frame(cpu, i);
+		per_cpu(last_timestamp, cpu) = 0;
+	}
+	printed_monotonic_warning = false;
+
+	/* Capture the start time */
+	gator_summary();
+
+	/* start all events */
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->start && gi->start() != 0) {
+			struct list_head *ptr = gi->list.prev;
+
+			while (ptr != &gator_events) {
+				gi = list_entry(ptr, struct gator_interface, list);
+
+				if (gi->stop)
+					gi->stop();
+
+				ptr = ptr->prev;
+			}
+			goto events_failure;
+		}
+	}
+
+	/* cookies shall be initialized before trace_sched_start() and gator_timer_start() */
+	if (cookies_initialize())
+		goto cookies_failure;
+	if (gator_annotate_start())
+		goto annotate_failure;
+	if (gator_trace_sched_start())
+		goto sched_failure;
+	if (gator_trace_power_start())
+		goto power_failure;
+	if (gator_trace_gpu_start())
+		goto gpu_failure;
+	if (gator_timer_start(gator_timer_count))
+		goto timer_failure;
+	if (gator_notifier_start())
+		goto notifier_failure;
+
+	return 0;
+
+notifier_failure:
+	gator_timer_stop();
+timer_failure:
+	gator_trace_gpu_stop();
+gpu_failure:
+	gator_trace_power_stop();
+power_failure:
+	gator_trace_sched_stop();
+sched_failure:
+	gator_annotate_stop();
+annotate_failure:
+	cookies_release();
+cookies_failure:
+	/* stop all events */
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->stop)
+			gi->stop();
+events_failure:
+	gator_migrate_stop();
+migrate_failure:
+	gator_buffer_wake_run = false;
+	up(&gator_buffer_wake_sem);
+	gator_buffer_wake_thread = NULL;
+bwake_failure:
+
+	return -1;
+}
+
+static void gator_stop(void)
+{
+	struct gator_interface *gi;
+
+	gator_annotate_stop();
+	gator_trace_sched_stop();
+	gator_trace_power_stop();
+	gator_trace_gpu_stop();
+
+	/* stop all interrupt callback reads before tearing down other interfaces */
+	gator_notifier_stop();	/* should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined */
+	gator_timer_stop();
+
+	/* stop all events */
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->stop)
+			gi->stop();
+
+	gator_migrate_stop();
+
+	gator_buffer_wake_run = false;
+	up(&gator_buffer_wake_sem);
+	gator_buffer_wake_thread = NULL;
+}
+
+/******************************************************************************
+ * Filesystem
+ ******************************************************************************/
+/* fopen("buffer") */
+static int gator_op_setup(void)
+{
+	int err = 0;
+	int cpu, i;
+
+	mutex_lock(&start_mutex);
+
+	gator_buffer_size[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE;
+	gator_buffer_mask[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE - 1;
+
+	gator_buffer_size[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE;
+	gator_buffer_mask[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[NAME_BUF] = NAME_BUFFER_SIZE;
+	gator_buffer_mask[NAME_BUF] = NAME_BUFFER_SIZE - 1;
+
+	gator_buffer_size[COUNTER_BUF] = COUNTER_BUFFER_SIZE;
+	gator_buffer_mask[COUNTER_BUF] = COUNTER_BUFFER_SIZE - 1;
+
+	gator_buffer_size[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE;
+	gator_buffer_mask[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE - 1;
+
+	gator_buffer_size[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE;
+	gator_buffer_mask[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE;
+	gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE;
+	gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE;
+	gator_buffer_mask[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE - 1;
+
+	/* Initialize percpu per buffer variables */
+	for (i = 0; i < NUM_GATOR_BUFS; i++) {
+		/* Verify buffers are a power of 2 */
+		if (gator_buffer_size[i] & (gator_buffer_size[i] - 1)) {
+			err = -ENOEXEC;
+			goto setup_error;
+		}
+
+		for_each_present_cpu(cpu) {
+			per_cpu(gator_buffer_read, cpu)[i] = 0;
+			per_cpu(gator_buffer_write, cpu)[i] = 0;
+			per_cpu(gator_buffer_commit, cpu)[i] = 0;
+			per_cpu(buffer_space_available, cpu)[i] = true;
+			per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
+
+			/* Annotation is a special case that only uses a single buffer */
+			if (cpu > 0 && i == ANNOTATE_BUF) {
+				per_cpu(gator_buffer, cpu)[i] = NULL;
+				continue;
+			}
+
+			per_cpu(gator_buffer, cpu)[i] = vmalloc(gator_buffer_size[i]);
+			if (!per_cpu(gator_buffer, cpu)[i]) {
+				err = -ENOMEM;
+				goto setup_error;
+			}
+		}
+	}
+
+setup_error:
+	mutex_unlock(&start_mutex);
+	return err;
+}
+
+/* Actually start profiling (echo 1>/dev/gator/enable) */
+static int gator_op_start(void)
+{
+	int err = 0;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started || gator_start())
+		err = -EINVAL;
+	else
+		gator_started = 1;
+
+	mutex_unlock(&start_mutex);
+
+	return err;
+}
+
+/* echo 0>/dev/gator/enable */
+static void gator_op_stop(void)
+{
+	mutex_lock(&start_mutex);
+
+	if (gator_started) {
+		gator_stop();
+
+		mutex_lock(&gator_buffer_mutex);
+
+		gator_started = 0;
+		gator_monotonic_started = 0;
+		cookies_release();
+		wake_up(&gator_buffer_wait);
+
+		mutex_unlock(&gator_buffer_mutex);
+	}
+
+	mutex_unlock(&start_mutex);
+}
+
+static void gator_shutdown(void)
+{
+	int cpu, i;
+
+	mutex_lock(&start_mutex);
+
+	for_each_present_cpu(cpu) {
+		mutex_lock(&gator_buffer_mutex);
+		for (i = 0; i < NUM_GATOR_BUFS; i++) {
+			vfree(per_cpu(gator_buffer, cpu)[i]);
+			per_cpu(gator_buffer, cpu)[i] = NULL;
+			per_cpu(gator_buffer_read, cpu)[i] = 0;
+			per_cpu(gator_buffer_write, cpu)[i] = 0;
+			per_cpu(gator_buffer_commit, cpu)[i] = 0;
+			per_cpu(buffer_space_available, cpu)[i] = true;
+			per_cpu(gator_buffer_commit_time, cpu) = 0;
+		}
+		mutex_unlock(&gator_buffer_mutex);
+	}
+
+	memset(&sent_core_name, 0, sizeof(sent_core_name));
+
+	mutex_unlock(&start_mutex);
+}
+
+static int gator_set_backtrace(unsigned long val)
+{
+	int err = 0;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started)
+		err = -EBUSY;
+	else
+		gator_backtrace_depth = val;
+
+	mutex_unlock(&start_mutex);
+
+	return err;
+}
+
+static ssize_t enable_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	return gatorfs_ulong_to_user(gator_started, buf, count, offset);
+}
+
+static ssize_t enable_write(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (val)
+		retval = gator_op_start();
+	else
+		gator_op_stop();
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static const struct file_operations enable_fops = {
+	.read = enable_read,
+	.write = enable_write,
+};
+
+static int userspace_buffer_open(struct inode *inode, struct file *file)
+{
+	int err = -EPERM;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (test_and_set_bit_lock(0, &gator_buffer_opened))
+		return -EBUSY;
+
+	err = gator_op_setup();
+	if (err)
+		goto fail;
+
+	/* NB: the actual start happens from userspace
+	 * echo 1 >/dev/gator/enable
+	 */
+
+	return 0;
+
+fail:
+	__clear_bit_unlock(0, &gator_buffer_opened);
+	return err;
+}
+
+static int userspace_buffer_release(struct inode *inode, struct file *file)
+{
+	gator_op_stop();
+	gator_shutdown();
+	__clear_bit_unlock(0, &gator_buffer_opened);
+	return 0;
+}
+
+static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	int commit, length1, length2, read;
+	char *buffer1;
+	char *buffer2;
+	int cpu, buftype;
+	int written = 0;
+
+	/* ensure there is enough space for a whole frame */
+	if (count < userspace_buffer_size || *offset)
+		return -EINVAL;
+
+	/* sleep until the condition is true or a signal is received the
+	 * condition is checked each time gator_buffer_wait is woken up
+	 */
+	wait_event_interruptible(gator_buffer_wait, buffer_commit_ready(&cpu, &buftype) || !gator_started);
+
+	if (signal_pending(current))
+		return -EINTR;
+
+	if (buftype == -1 || cpu == -1)
+		return 0;
+
+	mutex_lock(&gator_buffer_mutex);
+
+	do {
+		read = per_cpu(gator_buffer_read, cpu)[buftype];
+		commit = per_cpu(gator_buffer_commit, cpu)[buftype];
+
+		/* May happen if the buffer is freed during pending reads. */
+		if (!per_cpu(gator_buffer, cpu)[buftype])
+			break;
+
+		/* determine the size of two halves */
+		length1 = commit - read;
+		length2 = 0;
+		buffer1 = &(per_cpu(gator_buffer, cpu)[buftype][read]);
+		buffer2 = &(per_cpu(gator_buffer, cpu)[buftype][0]);
+		if (length1 < 0) {
+			length1 = gator_buffer_size[buftype] - read;
+			length2 = commit;
+		}
+
+		if (length1 + length2 > count - written)
+			break;
+
+		/* start, middle or end */
+		if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1))
+			break;
+
+		/* possible wrap around */
+		if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2))
+			break;
+
+		per_cpu(gator_buffer_read, cpu)[buftype] = commit;
+		written += length1 + length2;
+
+		/* Wake up annotate_write if more space is available */
+		if (buftype == ANNOTATE_BUF)
+			wake_up(&gator_annotate_wait);
+	} while (buffer_commit_ready(&cpu, &buftype));
+
+	mutex_unlock(&gator_buffer_mutex);
+
+	/* kick just in case we've lost an SMP event */
+	wake_up(&gator_buffer_wait);
+
+	return written > 0 ? written : -EFAULT;
+}
+
+static const struct file_operations gator_event_buffer_fops = {
+	.open = userspace_buffer_open,
+	.release = userspace_buffer_release,
+	.read = userspace_buffer_read,
+};
+
+static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	return gatorfs_ulong_to_user(gator_backtrace_depth, buf, count, offset);
+}
+
+static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	retval = gator_set_backtrace(val);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static const struct file_operations depth_fops = {
+	.read = depth_read,
+	.write = depth_write
+};
+
+static void gator_op_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	struct gator_interface *gi;
+	int cpu;
+
+	/* reinitialize default values */
+	gator_cpu_cores = 0;
+	for_each_present_cpu(cpu) {
+		gator_cpu_cores++;
+	}
+	userspace_buffer_size = BACKTRACE_BUFFER_SIZE;
+	gator_response_type = 1;
+	gator_live_rate = 0;
+
+	gatorfs_create_file(sb, root, "enable", &enable_fops);
+	gatorfs_create_file(sb, root, "buffer", &gator_event_buffer_fops);
+	gatorfs_create_file(sb, root, "backtrace_depth", &depth_fops);
+	gatorfs_create_ro_ulong(sb, root, "cpu_cores", &gator_cpu_cores);
+	gatorfs_create_ro_ulong(sb, root, "buffer_size", &userspace_buffer_size);
+	gatorfs_create_ulong(sb, root, "tick", &gator_timer_count);
+	gatorfs_create_ulong(sb, root, "response_type", &gator_response_type);
+	gatorfs_create_ro_ulong(sb, root, "version", &gator_protocol_version);
+	gatorfs_create_ro_u64(sb, root, "started", &gator_monotonic_started);
+	gatorfs_create_u64(sb, root, "live_rate", &gator_live_rate);
+
+	/* Annotate interface */
+	gator_annotate_create_files(sb, root);
+
+	/* Linux Events */
+	dir = gatorfs_mkdir(sb, root, "events");
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->create_files)
+			gi->create_files(sb, dir);
+
+	/* Sched Events */
+	sched_trace_create_files(sb, dir);
+
+	/* Power interface */
+	gator_trace_power_create_files(sb, dir);
+}
+
+/******************************************************************************
+ * Module
+ ******************************************************************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
+
+#define GATOR_TRACEPOINTS \
+	GATOR_HANDLE_TRACEPOINT(block_rq_complete); \
+	GATOR_HANDLE_TRACEPOINT(cpu_frequency); \
+	GATOR_HANDLE_TRACEPOINT(cpu_idle); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_begin); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_current); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_finish); \
+	GATOR_HANDLE_TRACEPOINT(irq_handler_exit); \
+	GATOR_HANDLE_TRACEPOINT(mali_hw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_job_slots_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_in_use); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_released); \
+	GATOR_HANDLE_TRACEPOINT(mali_page_fault_insert_pages); \
+	GATOR_HANDLE_TRACEPOINT(mali_pm_status); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counters); \
+	GATOR_HANDLE_TRACEPOINT(mali_timeline_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_total_alloc_pages_change); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_alloc); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free_batched); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_exec); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_fork); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_free); \
+	GATOR_HANDLE_TRACEPOINT(sched_switch); \
+	GATOR_HANDLE_TRACEPOINT(softirq_exit); \
+	GATOR_HANDLE_TRACEPOINT(task_rename); \
+
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	struct tracepoint *gator_tracepoint_##probe_name
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+
+static void gator_save_tracepoint(struct tracepoint *tp, void *priv)
+{
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	do { \
+		if (strcmp(tp->name, #probe_name) == 0) { \
+			gator_tracepoint_##probe_name = tp; \
+			return; \
+		} \
+	} while (0)
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+}
+
+#else
+
+#define for_each_kernel_tracepoint(fct, priv)
+
+#endif
+
+static int __init gator_module_init(void)
+{
+	for_each_kernel_tracepoint(gator_save_tracepoint, NULL);
+
+	if (gatorfs_register())
+		return -1;
+
+	if (gator_init()) {
+		gatorfs_unregister();
+		return -1;
+	}
+
+	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
+
+	/* Initialize the list of cpuids */
+	memset(gator_cpuids, -1, sizeof(gator_cpuids));
+	on_each_cpu(gator_read_cpuid, NULL, 1);
+
+	return 0;
+}
+
+static void __exit gator_module_exit(void)
+{
+	del_timer_sync(&gator_buffer_wake_up_timer);
+	tracepoint_synchronize_unregister();
+	gator_exit();
+	gatorfs_unregister();
+}
+
+module_init(gator_module_init);
+module_exit(gator_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd");
+MODULE_DESCRIPTION("Gator system profiler");
+#define STRIFY2(ARG) #ARG
+#define STRIFY(ARG) STRIFY2(ARG)
+MODULE_VERSION(STRIFY(PROTOCOL_VERSION));
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
new file mode 100644
index 000000000000..0d1167643642
--- /dev/null
+++ b/drivers/gator/gator_marshaling.c
@@ -0,0 +1,371 @@
+/**
+ * Copyright (C) ARM Limited 2012-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define NEWLINE_CANARY \
+	/* Unix */ \
+	"1\n" \
+	/* Windows */ \
+	"2\r\n" \
+	/* Mac OS */ \
+	"3\r" \
+	/* RISC OS */ \
+	"4\n\r" \
+	/* Add another character so the length isn't 0x0a bytes */ \
+	"5"
+
+#ifdef MALI_SUPPORT
+#include "gator_events_mali_common.h"
+#endif
+
+static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char *uname)
+{
+	unsigned long flags;
+	int cpu = 0;
+
+	local_irq_save(flags);
+	gator_buffer_write_packed_int(cpu, SUMMARY_BUF, MESSAGE_SUMMARY);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, NEWLINE_CANARY);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, timestamp);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, uptime);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "uname");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, uname);
+#if GATOR_IKS_SUPPORT
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+#ifdef CONFIG_PREEMPT_RTB
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "preempt_rtb");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "preempt_rt_full");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+	/* Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release. */
+#ifdef MALI_SUPPORT
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type");
+#if (MALI_SUPPORT == MALI_4xx)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx");
+#elif (MALI_SUPPORT == MALI_MIDGARD)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx");
+#else
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown");
+#endif
+#endif
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+	/* Commit the buffer now so it can be one of the first frames read by Streamline */
+	local_irq_restore(flags);
+	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
+}
+
+static bool marshal_cookie_header(const char *text)
+{
+	int cpu = get_physical_cpu();
+
+	return buffer_check_space(cpu, NAME_BUF, strlen(text) + 3 * MAXSIZE_PACK32);
+}
+
+static void marshal_cookie(int cookie, const char *text)
+{
+	int cpu = get_physical_cpu();
+	/* buffer_check_space already called by marshal_cookie_header */
+	gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_COOKIE);
+	gator_buffer_write_packed_int(cpu, NAME_BUF, cookie);
+	gator_buffer_write_string(cpu, NAME_BUF, text);
+	buffer_check(cpu, NAME_BUF, gator_get_time());
+}
+
+static void marshal_thread_name(int pid, char *name)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, NAME_BUF, TASK_COMM_LEN + 3 * MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_THREAD_NAME);
+		gator_buffer_write_packed_int64(cpu, NAME_BUF, time);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, pid);
+		gator_buffer_write_string(cpu, NAME_BUF, name);
+	}
+	local_irq_restore(flags);
+	buffer_check(cpu, NAME_BUF, time);
+}
+
+static void marshal_link(int cookie, int tgid, int pid)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_LINK);
+		gator_buffer_write_packed_int64(cpu, NAME_BUF, time);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, cookie);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, pid);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, NAME_BUF, time);
+}
+
+static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, u64 time)
+{
+	int cpu = get_physical_cpu();
+
+	if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
+		/* Check and commit; commit is set to occur once buffer is 3/4 full */
+		buffer_check(cpu, BACKTRACE_BUF, time);
+
+		return false;
+	}
+
+	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
+
+	return true;
+}
+
+static void marshal_backtrace(unsigned long address, int cookie, int in_kernel)
+{
+	int cpu = get_physical_cpu();
+
+	if (cookie == 0 && !in_kernel)
+		cookie = UNRESOLVED_COOKIE;
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, cookie);
+	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address);
+}
+
+static void marshal_backtrace_footer(u64 time)
+{
+	int cpu = get_physical_cpu();
+
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE);
+
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, BACKTRACE_BUF, time);
+}
+
+static bool marshal_event_header(u64 time)
+{
+	unsigned long flags, cpu = get_physical_cpu();
+	bool retval = false;
+
+	local_irq_save(flags);
+	if (buffer_check_space(cpu, BLOCK_COUNTER_BUF, MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, 0);	/* key of zero indicates a timestamp */
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, time);
+		retval = true;
+	}
+	local_irq_restore(flags);
+
+	return retval;
+}
+
+static void marshal_event(int len, int *buffer)
+{
+	unsigned long i, flags, cpu = get_physical_cpu();
+
+	if (len <= 0)
+		return;
+
+	/* length must be even since all data is a (key, value) pair */
+	if (len & 0x1) {
+		pr_err("gator: invalid counter data detected and discarded\n");
+		return;
+	}
+
+	/* events must be written in key,value pairs */
+	local_irq_save(flags);
+	for (i = 0; i < len; i += 2) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK32))
+			break;
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i]);
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i + 1]);
+	}
+	local_irq_restore(flags);
+}
+
+static void marshal_event64(int len, long long *buffer64)
+{
+	unsigned long i, flags, cpu = get_physical_cpu();
+
+	if (len <= 0)
+		return;
+
+	/* length must be even since all data is a (key, value) pair */
+	if (len & 0x1) {
+		pr_err("gator: invalid counter data detected and discarded\n");
+		return;
+	}
+
+	/* events must be written in key,value pairs */
+	local_irq_save(flags);
+	for (i = 0; i < len; i += 2) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK64))
+			break;
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i]);
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i + 1]);
+	}
+	local_irq_restore(flags);
+}
+
+static void __maybe_unused marshal_event_single(int core, int key, int value)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, COUNTER_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, core);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, key);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, value);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, COUNTER_BUF, time);
+}
+
+static void __maybe_unused marshal_event_single64(int core, int key, long long value)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, COUNTER_BUF, 2 * MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, core);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, key);
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, value);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, COUNTER_BUF, time);
+}
+
+static void marshal_sched_trace_switch(int pid, int state)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_SWITCH);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+}
+
+static void marshal_sched_trace_exit(int tgid, int pid)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_EXIT);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+}
+
+#if GATOR_CPU_FREQ_SUPPORT
+static void marshal_idle(int core, int state)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, IDLE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, IDLE_BUF, state);
+		gator_buffer_write_packed_int64(cpu, IDLE_BUF, time);
+		gator_buffer_write_packed_int(cpu, IDLE_BUF, core);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, IDLE_BUF, time);
+}
+#endif
+
+#if defined(__arm__) || defined(__aarch64__)
+static void marshal_core_name(const int core, const int cpuid, const char *name)
+{
+	int cpu = get_physical_cpu();
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (buffer_check_space(cpu, SUMMARY_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) {
+		gator_buffer_write_packed_int(cpu, SUMMARY_BUF, MESSAGE_CORE_NAME);
+		gator_buffer_write_packed_int(cpu, SUMMARY_BUF, core);
+		gator_buffer_write_packed_int(cpu, SUMMARY_BUF, cpuid);
+		gator_buffer_write_string(cpu, SUMMARY_BUF, name);
+	}
+	/* Commit core names now so that they can show up in live */
+	local_irq_restore(flags);
+	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
+}
+#endif
+
+static void marshal_activity_switch(int core, int key, int activity, int pid, int state)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[ACTIVITY_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, ACTIVITY_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, MESSAGE_SWITCH);
+		gator_buffer_write_packed_int64(cpu, ACTIVITY_BUF, time);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, core);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, key);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, activity);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, pid);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, state);
+	}
+	local_irq_restore(flags);
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
+	buffer_check(cpu, ACTIVITY_BUF, time);
+}
+
+void gator_marshal_activity_switch(int core, int key, int activity, int pid)
+{
+	/* state is reserved for cpu use only */
+	marshal_activity_switch(core, key, activity, pid, 0);
+}
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
new file mode 100644
index 000000000000..5de9152e365a
--- /dev/null
+++ b/drivers/gator/gator_trace_gpu.c
@@ -0,0 +1,321 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#ifdef MALI_SUPPORT
+#ifdef MALI_DIR_MIDGARD
+/* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
+#include "mali_linux_trace.h"
+#else
+/* Old DDK Directory structure with kernel/drivers/gpu/arm/t6xx*/
+#include "linux/mali_linux_trace.h"
+#endif
+#endif
+
+/*
+ * Taken from MALI_PROFILING_EVENT_TYPE_* items in Mali DDK.
+ */
+#define EVENT_TYPE_SINGLE  0
+#define EVENT_TYPE_START   1
+#define EVENT_TYPE_STOP    2
+#define EVENT_TYPE_SUSPEND 3
+#define EVENT_TYPE_RESUME  4
+
+/* Note whether tracepoints have been registered */
+static int mali_timeline_trace_registered;
+static int mali_job_slots_trace_registered;
+
+enum {
+	GPU_UNIT_NONE = 0,
+	GPU_UNIT_VP,
+	GPU_UNIT_FP,
+	GPU_UNIT_CL,
+	NUMBER_OF_GPU_UNITS
+};
+
+#if defined(MALI_SUPPORT)
+
+struct mali_activity {
+	int core;
+	int key;
+	int count;
+	int last_activity;
+	int last_pid;
+};
+
+#define NUMBER_OF_GPU_CORES 16
+static struct mali_activity mali_activities[NUMBER_OF_GPU_UNITS*NUMBER_OF_GPU_CORES];
+static DEFINE_SPINLOCK(mali_activities_lock);
+
+/* Only one event should be running on a unit and core at a time (ie,
+ * a start event can only be followed by a stop and vice versa), but
+ * because the kernel only knows when a job is enqueued and not
+ * started, it is possible for a start1, start2, stop1, stop2. Change
+ * it back into start1, stop1, start2, stop2 by queueing up start2 and
+ * releasing it when stop1 is received.
+ */
+
+static int mali_activity_index(int core, int key)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mali_activities); ++i) {
+		if ((mali_activities[i].core == core) && (mali_activities[i].key == key))
+			break;
+		if ((mali_activities[i].core == 0) && (mali_activities[i].key == 0)) {
+			mali_activities[i].core = core;
+			mali_activities[i].key = key;
+			break;
+		}
+	}
+	BUG_ON(i >= ARRAY_SIZE(mali_activities));
+
+	return i;
+}
+
+static void mali_activity_enqueue(int core, int key, int activity, int pid)
+{
+	int i;
+	int count;
+
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	count = mali_activities[i].count;
+	BUG_ON(count < 0);
+	++mali_activities[i].count;
+	if (count) {
+		mali_activities[i].last_activity = activity;
+		mali_activities[i].last_pid = pid;
+	}
+	spin_unlock(&mali_activities_lock);
+
+	if (!count)
+		gator_marshal_activity_switch(core, key, activity, pid);
+}
+
+static void mali_activity_stop(int core, int key)
+{
+	int i;
+	int count;
+	int last_activity = 0;
+	int last_pid = 0;
+
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	if (mali_activities[i].count == 0) {
+		spin_unlock(&mali_activities_lock);
+		return;
+	}
+	--mali_activities[i].count;
+	count = mali_activities[i].count;
+	if (count) {
+		last_activity = mali_activities[i].last_activity;
+		last_pid = mali_activities[i].last_pid;
+	}
+	spin_unlock(&mali_activities_lock);
+
+	gator_marshal_activity_switch(core, key, 0, 0);
+	if (count)
+		gator_marshal_activity_switch(core, key, last_activity, last_pid);
+}
+
+void mali_activity_clear(struct mali_counter mali_activity[], size_t mali_activity_size)
+{
+	int activity;
+	int cores;
+	int core;
+
+	for (activity = 0; activity < mali_activity_size; ++activity) {
+		cores = mali_activity[activity].cores;
+		if (cores < 0)
+			cores = 1;
+		for (core = 0; core < cores; ++core) {
+			if (mali_activity[activity].enabled) {
+				preempt_disable();
+				gator_marshal_activity_switch(core, mali_activity[activity].key, 0, 0);
+				preempt_enable();
+			}
+		}
+	}
+}
+
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
+#include "gator_events_mali_4xx.h"
+
+/*
+ * Taken from MALI_PROFILING_EVENT_CHANNEL_* in Mali DDK.
+ */
+enum {
+	EVENT_CHANNEL_SOFTWARE = 0,
+	EVENT_CHANNEL_VP0 = 1,
+	EVENT_CHANNEL_FP0 = 5,
+	EVENT_CHANNEL_FP1,
+	EVENT_CHANNEL_FP2,
+	EVENT_CHANNEL_FP3,
+	EVENT_CHANNEL_FP4,
+	EVENT_CHANNEL_FP5,
+	EVENT_CHANNEL_FP6,
+	EVENT_CHANNEL_FP7,
+	EVENT_CHANNEL_GPU = 21
+};
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+enum {
+	EVENT_REASON_SINGLE_GPU_NONE = 0,
+	EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1,
+};
+
+struct mali_counter mali_activity[2];
+
+GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4))
+{
+	unsigned int component, state;
+
+	/* do as much work as possible before disabling interrupts */
+	component = (event_id >> 16) & 0xFF;	/* component is an 8-bit field */
+	state = (event_id >> 24) & 0xF;	/* state is a 4-bit field */
+
+	switch (state) {
+	case EVENT_TYPE_START:
+		if (component == EVENT_CHANNEL_VP0) {
+			/* tgid = d0; pid = d1; */
+			if (mali_activity[1].enabled)
+				mali_activity_enqueue(0, mali_activity[1].key, 1, d1);
+		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
+			/* tgid = d0; pid = d1; */
+			if (mali_activity[0].enabled)
+				mali_activity_enqueue(component - EVENT_CHANNEL_FP0, mali_activity[0].key, 1, d1);
+		}
+		break;
+
+	case EVENT_TYPE_STOP:
+		if (component == EVENT_CHANNEL_VP0) {
+			if (mali_activity[1].enabled)
+				mali_activity_stop(0, mali_activity[1].key);
+		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
+			if (mali_activity[0].enabled)
+				mali_activity_stop(component - EVENT_CHANNEL_FP0, mali_activity[0].key);
+		}
+		break;
+
+	case EVENT_TYPE_SINGLE:
+		if (component == EVENT_CHANNEL_GPU) {
+			unsigned int reason = (event_id & 0xffff);
+
+			if (reason == EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE)
+				gator_events_mali_log_dvfs_event(d0, d1);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
+
+struct mali_counter mali_activity[3];
+
+#if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
+#else
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid))
+#endif
+{
+	unsigned int component, state, unit;
+#if !defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+	unsigned char job_id = 0;
+#endif
+
+	component = (event_id >> 16) & 0xFF;	/* component is an 8-bit field */
+	state = (event_id >> 24) & 0xF;	/* state is a 4-bit field */
+
+	switch (component) {
+	case 0:
+		unit = GPU_UNIT_FP;
+		break;
+	case 1:
+		unit = GPU_UNIT_VP;
+		break;
+	case 2:
+		unit = GPU_UNIT_CL;
+		break;
+	default:
+		unit = GPU_UNIT_NONE;
+	}
+
+	if (unit != GPU_UNIT_NONE) {
+		switch (state) {
+		case EVENT_TYPE_START:
+			if (mali_activity[component].enabled)
+				mali_activity_enqueue(0, mali_activity[component].key, 1, (pid != 0 ? pid : tgid));
+			break;
+		case EVENT_TYPE_STOP:
+		default: /* Some jobs can be soft-stopped, so ensure that this terminates the activity trace. */
+			if (mali_activity[component].enabled)
+				mali_activity_stop(0, mali_activity[component].key);
+			break;
+		}
+	}
+}
+#endif
+
+static int gator_trace_gpu_start(void)
+{
+	/*
+	 * Returns nonzero for installation failed
+	 * Absence of gpu trace points is not an error
+	 */
+
+#if defined(MALI_SUPPORT)
+	memset(&mali_activities, 0, sizeof(mali_activities));
+#endif
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
+	if (!GATOR_REGISTER_TRACE(mali_timeline_event))
+		mali_timeline_trace_registered = 1;
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
+	if (!GATOR_REGISTER_TRACE(mali_job_slots_event))
+		mali_job_slots_trace_registered = 1;
+#endif
+
+	return 0;
+}
+
+static void gator_trace_gpu_stop(void)
+{
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
+	if (mali_timeline_trace_registered)
+		GATOR_UNREGISTER_TRACE(mali_timeline_event);
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
+	if (mali_job_slots_trace_registered)
+		GATOR_UNREGISTER_TRACE(mali_job_slots_event);
+#endif
+
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+}
diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c
new file mode 100644
index 000000000000..46e04b29a187
--- /dev/null
+++ b/drivers/gator/gator_trace_power.c
@@ -0,0 +1,192 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <trace/events/power.h>
+
+#if defined(__arm__)
+
+#include <asm/mach-types.h>
+
+#define implements_wfi() (!machine_is_omap3_beagle())
+
+#else
+
+#define implements_wfi() false
+
+#endif
+
+/* cpu_frequency and cpu_idle trace points were introduced in Linux
+ * kernel v2.6.38 the now deprecated power_frequency trace point was
+ * available prior to 2.6.38, but only for x86
+ */
+#if GATOR_CPU_FREQ_SUPPORT
+enum {
+	POWER_CPU_FREQ,
+	POWER_TOTAL
+};
+
+static DEFINE_PER_CPU(ulong, idle_prev_state);
+static ulong power_cpu_enabled[POWER_TOTAL];
+static ulong power_cpu_key[POWER_TOTAL];
+static ulong power_cpu_cores;
+
+static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int cpu;
+	bool found_nonzero_freq = false;
+
+	/* Even if CONFIG_CPU_FREQ is defined, it still may not be
+	 * used. Check for non-zero values from cpufreq_quick_get
+	 */
+	for_each_online_cpu(cpu) {
+		if (cpufreq_quick_get(cpu) > 0) {
+			found_nonzero_freq = true;
+			break;
+		}
+	}
+
+	if (found_nonzero_freq) {
+		/* cpu_frequency */
+		dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_freq");
+		if (!dir)
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_FREQ]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_FREQ]);
+	}
+
+	return 0;
+}
+
+/* 'cpu' may not equal smp_processor_id(), i.e. may not be running on the core that is having the freq/idle state change */
+GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu))
+{
+	cpu = lcpu_to_pcpu(cpu);
+	marshal_event_single64(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000L);
+}
+
+GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu))
+{
+	cpu = lcpu_to_pcpu(cpu);
+
+	if (state == per_cpu(idle_prev_state, cpu))
+		return;
+
+	if (implements_wfi()) {
+		if (state == PWR_EVENT_EXIT) {
+			/* transition from wfi to non-wfi */
+			marshal_idle(cpu, MESSAGE_IDLE_EXIT);
+		} else {
+			/* transition from non-wfi to wfi */
+			marshal_idle(cpu, MESSAGE_IDLE_ENTER);
+		}
+	}
+
+	per_cpu(idle_prev_state, cpu) = state;
+}
+
+static void gator_trace_power_online(void)
+{
+	int pcpu = get_physical_cpu();
+	int lcpu = get_logical_cpu();
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		marshal_event_single64(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000L);
+}
+
+static void gator_trace_power_offline(void)
+{
+	/* Set frequency to zero on an offline */
+	int cpu = get_physical_cpu();
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], 0);
+}
+
+static int gator_trace_power_start(void)
+{
+	int cpu;
+
+	/* register tracepoints */
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		if (GATOR_REGISTER_TRACE(cpu_frequency))
+			goto fail_cpu_frequency_exit;
+
+	/* Always register for cpu_idle for detecting WFI */
+	if (GATOR_REGISTER_TRACE(cpu_idle))
+		goto fail_cpu_idle_exit;
+	pr_debug("gator: registered power event tracepoints\n");
+
+	for_each_present_cpu(cpu) {
+		per_cpu(idle_prev_state, cpu) = 0;
+	}
+
+	return 0;
+
+	/* unregister tracepoints on error */
+fail_cpu_idle_exit:
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		GATOR_UNREGISTER_TRACE(cpu_frequency);
+fail_cpu_frequency_exit:
+	pr_err("gator: power event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_trace_power_stop(void)
+{
+	int i;
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		GATOR_UNREGISTER_TRACE(cpu_frequency);
+	GATOR_UNREGISTER_TRACE(cpu_idle);
+	pr_debug("gator: unregistered power event tracepoints\n");
+
+	for (i = 0; i < POWER_TOTAL; i++)
+		power_cpu_enabled[i] = 0;
+}
+
+static void gator_trace_power_init(void)
+{
+	int i;
+
+	power_cpu_cores = nr_cpu_ids;
+	for (i = 0; i < POWER_TOTAL; i++) {
+		power_cpu_enabled[i] = 0;
+		power_cpu_key[i] = gator_events_get_key();
+	}
+}
+#else
+static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root)
+{
+	return 0;
+}
+
+static void gator_trace_power_online(void)
+{
+}
+
+static void gator_trace_power_offline(void)
+{
+}
+
+static int gator_trace_power_start(void)
+{
+	return 0;
+}
+
+static void gator_trace_power_stop(void)
+{
+}
+
+static void gator_trace_power_init(void)
+{
+}
+#endif
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
new file mode 100644
index 000000000000..6d7cbd7348e1
--- /dev/null
+++ b/drivers/gator/gator_trace_sched.c
@@ -0,0 +1,321 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <trace/events/sched.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+#include <trace/events/task.h>
+#endif
+
+#include "gator.h"
+
+#define TASK_MAP_ENTRIES		1024	/* must be power of 2 */
+#define TASK_MAX_COLLISIONS		2
+
+enum {
+	STATE_WAIT_ON_OTHER = 0,
+	STATE_CONTENTION,
+	STATE_WAIT_ON_IO,
+	CPU_WAIT_TOTAL
+};
+
+static DEFINE_PER_CPU(uint64_t *, taskname_keys);
+static DEFINE_PER_CPU(int, collecting);
+
+/* this array is never read as the cpu wait charts are derived
+ * counters the files are needed, nonetheless, to show that these
+ * counters are available
+ */
+static ulong cpu_wait_enabled[CPU_WAIT_TOTAL];
+static ulong sched_cpu_key[CPU_WAIT_TOTAL];
+
+static int sched_trace_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* CPU Wait - Contention */
+	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_contention");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_CONTENTION]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_CONTENTION]);
+
+	/* CPU Wait - I/O */
+	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_io");
+	if (!dir)
+		return -1;
+	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_WAIT_ON_IO]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_WAIT_ON_IO]);
+
+	return 0;
+}
+
+static void emit_pid_name(const char *comm, struct task_struct *task)
+{
+	bool found = false;
+	char taskcomm[TASK_COMM_LEN + 3];
+	unsigned long x, cpu = get_physical_cpu();
+	uint64_t *keys = &(per_cpu(taskname_keys, cpu)[(task->pid & 0xFF) * TASK_MAX_COLLISIONS]);
+	uint64_t value;
+
+	value = gator_chksum_crc32(comm);
+	value = (value << 32) | (uint32_t)task->pid;
+
+	/* determine if the thread name was emitted already */
+	for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
+		if (keys[x] == value) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		/* shift values, new value always in front */
+		uint64_t oldv, newv = value;
+
+		for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
+			oldv = keys[x];
+			keys[x] = newv;
+			newv = oldv;
+		}
+
+		/* emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions */
+		if (strlcpy(taskcomm, comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) {
+			/* append ellipses if comm has length of TASK_COMM_LEN - 1 */
+			strcat(taskcomm, "...");
+		}
+
+		marshal_thread_name(task->pid, taskcomm);
+	}
+}
+
+static void collect_counters(u64 time, struct task_struct *task, bool sched_switch)
+{
+	int *buffer, len, cpu = get_physical_cpu();
+	long long *buffer64;
+	struct gator_interface *gi;
+
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->read) {
+				len = gi->read(&buffer, sched_switch);
+				marshal_event(len, buffer);
+			} else if (gi->read64) {
+				len = gi->read64(&buffer64);
+				marshal_event64(len, buffer64);
+			}
+			if (gi->read_proc && task != NULL) {
+				len = gi->read_proc(&buffer64, task);
+				marshal_event64(len, buffer64);
+			}
+		}
+		if (cpu == 0)
+			gator_emit_perf_time(time);
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+
+		/* Commit buffers on timeout */
+		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
+			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF, ACTIVITY_BUF };
+			int i;
+
+			for (i = 0; i < ARRAY_SIZE(buftypes); ++i)
+				gator_commit_buffer(cpu, buftypes[i], time);
+
+			/* spinlocks are noops on uniprocessor machines and mutexes do
+			 * not work in sched_switch context in RT-Preempt full, so
+			 * disable proactive flushing of the annotate frame on
+			 * uniprocessor machines.
+			 */
+#ifdef CONFIG_SMP
+			/* Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full */
+			if (on_primary_core() && spin_trylock(&annotate_lock)) {
+				gator_commit_buffer(0, ANNOTATE_BUF, time);
+				spin_unlock(&annotate_lock);
+			}
+#endif
+		}
+	}
+}
+
+/* special case used during a suspend of the system */
+static void trace_sched_insert_idle(void)
+{
+	marshal_sched_trace_switch(0, 0);
+}
+
+static void gator_trace_emit_link(struct task_struct *p)
+{
+	int cookie;
+	int cpu = get_physical_cpu();
+
+	cookie = get_exec_cookie(cpu, p);
+	emit_pid_name(p->comm, p);
+
+	marshal_link(cookie, p->tgid, p->pid);
+}
+
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+	gator_trace_emit_link(child);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+GATOR_DEFINE_PROBE(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm))
+{
+	gator_trace_emit_link(p);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+GATOR_DEFINE_PROBE(task_rename, TP_PROTO(struct task_struct *task, char *comm))
+#else
+GATOR_DEFINE_PROBE(task_rename, TP_PROTO(struct task_struct *task, const char *comm))
+#endif
+{
+	emit_pid_name(comm, task);
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	int state;
+	int cpu = get_physical_cpu();
+
+	per_cpu(in_scheduler_context, cpu) = true;
+
+	/* do as much work as possible before disabling interrupts */
+	if (prev->state == TASK_RUNNING)
+		state = STATE_CONTENTION;
+	else if (prev->in_iowait)
+		state = STATE_WAIT_ON_IO;
+	else
+		state = STATE_WAIT_ON_OTHER;
+
+	per_cpu(collecting, cpu) = 1;
+	collect_counters(gator_get_time(), prev, true);
+	per_cpu(collecting, cpu) = 0;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+	gator_trace_emit_link(next);
+#endif
+	marshal_sched_trace_switch(next->pid, state);
+
+	per_cpu(in_scheduler_context, cpu) = false;
+}
+
+GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
+{
+	marshal_sched_trace_exit(p->tgid, p->pid);
+}
+
+static void do_nothing(void *info)
+{
+	/* Intentionally do nothing */
+	(void)info;
+}
+
+static int register_scheduler_tracepoints(void)
+{
+	/* register tracepoints */
+	if (GATOR_REGISTER_TRACE(sched_process_fork))
+		goto fail_sched_process_fork;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	if (GATOR_REGISTER_TRACE(sched_process_exec))
+		goto fail_sched_process_exec;
+	if (GATOR_REGISTER_TRACE(task_rename))
+		goto fail_task_rename;
+#endif
+	if (GATOR_REGISTER_TRACE(sched_switch))
+		goto fail_sched_switch;
+	if (GATOR_REGISTER_TRACE(sched_process_free))
+		goto fail_sched_process_free;
+	pr_debug("gator: registered tracepoints\n");
+
+	/* Now that the scheduler tracepoint is registered, force a context
+	 * switch on all cpus to capture what is currently running.
+	 */
+	on_each_cpu(do_nothing, NULL, 0);
+
+	return 0;
+
+	/* unregister tracepoints on error */
+fail_sched_process_free:
+	GATOR_UNREGISTER_TRACE(sched_switch);
+fail_sched_switch:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	GATOR_UNREGISTER_TRACE(task_rename);
+fail_task_rename:
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+fail_sched_process_exec:
+#endif
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+fail_sched_process_fork:
+	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void unregister_scheduler_tracepoints(void)
+{
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+	GATOR_UNREGISTER_TRACE(task_rename);
+#endif
+	GATOR_UNREGISTER_TRACE(sched_switch);
+	GATOR_UNREGISTER_TRACE(sched_process_free);
+	pr_debug("gator: unregistered tracepoints\n");
+}
+
+static void gator_trace_sched_stop(void)
+{
+	int cpu;
+
+	unregister_scheduler_tracepoints();
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(taskname_keys, cpu));
+	}
+}
+
+static int gator_trace_sched_start(void)
+{
+	int cpu, size;
+	int ret;
+
+	for_each_present_cpu(cpu) {
+		size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
+		per_cpu(taskname_keys, cpu) = kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(taskname_keys, cpu))
+			return -1;
+		memset(per_cpu(taskname_keys, cpu), 0, size);
+	}
+
+	ret = register_scheduler_tracepoints();
+
+	return ret;
+}
+
+static void gator_trace_sched_offline(void)
+{
+	trace_sched_insert_idle();
+}
+
+static void gator_trace_sched_init(void)
+{
+	int i;
+
+	for (i = 0; i < CPU_WAIT_TOTAL; i++) {
+		cpu_wait_enabled[i] = 0;
+		sched_cpu_key[i] = gator_events_get_key();
+	}
+}
diff --git a/drivers/gator/mali/mali_kbase_gator_api.h b/drivers/gator/mali/mali_kbase_gator_api.h
new file mode 100644
index 000000000000..5ed069797e36
--- /dev/null
+++ b/drivers/gator/mali/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to collect hardware counters data from a Mali device.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling kbase_gator_hwcnt_term_names().
+ *    This function must only be called if init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+#if !defined(MALI_TRUE)
+	#define MALI_TRUE                ((uint32_t)1)
+#endif
+
+#if !defined(MALI_FALSE)
+	#define MALI_FALSE               ((uint32_t)0)
+#endif
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_number_of_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                              Pointer to an array of strings of length *total_number_of_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_number_of_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
new file mode 100644
index 000000000000..2bc0b037eee6
--- /dev/null
+++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
@@ -0,0 +1,159 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+#define __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/*
+ * The number of processor cores.  Update to suit your hardware implementation.
+ */
+#define MAX_NUM_FP_CORES            (4)
+#define MAX_NUM_VP_CORES            (1)
+#define MAX_NUM_L2_CACHE_CORES      (1)
+
+enum counters {
+	/* Timeline activity */
+	ACTIVITY_VP_0 = 0,
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+
+	/* EGL Software Counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES Software Counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+
+	NUMBER_OF_EVENTS
+};
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_3
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_3_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+/* Signifies that the system is able to report voltage and frequency numbers. */
+#define DVFS_REPORTED_BY_DDK 1
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+};
+
+/*
+ * For compatibility with utgard.
+ */
+struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+};
+
+struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+};
+
+extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+extern u32 _mali_profiling_get_l2_counters(struct _mali_profiling_l2_counter_values *values);
+
+/*
+ * List of possible actions allowing DDK to be controlled by Streamline.
+ * The following numbers are used by DDK to control the frame buffer dumping.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE      (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 000000000000..d6465312628e
--- /dev/null
+++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,197 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+/** The list of events supported by the Mali DDK. */
+enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0, /* 1 */
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0, /*15*/
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1, /* 32 */
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_FILMSTRIP
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+};
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+};
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+};
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(struct _mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gator/mali_midgard.mk b/drivers/gator/mali_midgard.mk
new file mode 100644
index 000000000000..1b784d5c3d58
--- /dev/null
+++ b/drivers/gator/mali_midgard.mk
@@ -0,0 +1,39 @@
+# Defines for Mali-Midgard driver
+EXTRA_CFLAGS += -DMALI_USE_UMP=1 \
+                -DMALI_LICENSE_IS_GPL=1 \
+                -DMALI_BASE_TRACK_MEMLEAK=0 \
+                -DMALI_DEBUG=0 \
+                -DMALI_ERROR_INJECT_ON=0 \
+                -DMALI_CUSTOMER_RELEASE=1 \
+                -DMALI_UNIT_TEST=0 \
+                -DMALI_BACKEND_KERNEL=1 \
+                -DMALI_NO_MALI=0
+
+DDK_DIR ?= .
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/t6xx),)
+KBASE_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase
+OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase/osk
+endif
+
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard),)
+KBASE_DIR = $(DDK_DIR)/drivers/gpu/arm/midgard
+OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/midgard/osk
+EXTRA_CFLAGS += -DMALI_DIR_MIDGARD=1
+endif
+
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_kbase_gator_api.h),)
+EXTRA_CFLAGS += -DMALI_SIMPLE_API=1
+endif
+
+UMP_DIR = $(DDK_DIR)/include/linux
+
+# Include directories in the DDK
+EXTRA_CFLAGS += -I$(KBASE_DIR)/ \
+                -I$(KBASE_DIR)/.. \
+                -I$(OSK_DIR)/.. \
+                -I$(UMP_DIR)/.. \
+                -I$(DDK_DIR)/include \
+                -I$(KBASE_DIR)/osk/src/linux/include \
+                -I$(KBASE_DIR)/platform_dummy \
+                -I$(KBASE_DIR)/src
+
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index c03b490bba81..1d3600aa4ca5 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -27,6 +27,8 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/thermal.h>
 #include "lm75.h"
 
 
@@ -70,6 +72,7 @@ static const u8 LM75_REG_TEMP[3] = {
 /* Each client has this additional data */
 struct lm75_data {
 	struct device		*hwmon_dev;
+	struct thermal_zone_device	*tz;
 	struct mutex		update_lock;
 	u8			orig_conf;
 	u8			resolution;	/* In bits, between 9 and 12 */
@@ -90,22 +93,36 @@ static struct lm75_data *lm75_update_device(struct device *dev);
 
 /*-----------------------------------------------------------------------*/
 
+static inline long lm75_reg_to_mc(s16 temp, u8 resolution)
+{
+	return ((temp >> (16 - resolution)) * 1000) >> (resolution - 8);
+}
+
 /* sysfs attributes for hwmon */
 
+static int lm75_read_temp(void *dev, long *temp)
+{
+	struct lm75_data *data = lm75_update_device(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	*temp = lm75_reg_to_mc(data->temp[0], data->resolution);
+
+	return 0;
+}
+
 static ssize_t show_temp(struct device *dev, struct device_attribute *da,
 			 char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct lm75_data *data = lm75_update_device(dev);
-	long temp;
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	temp = ((data->temp[attr->index] >> (16 - data->resolution)) * 1000)
-	       >> (data->resolution - 8);
-
-	return sprintf(buf, "%ld\n", temp);
+	return sprintf(buf, "%ld\n", lm75_reg_to_mc(data->temp[attr->index],
+						    data->resolution));
 }
 
 static ssize_t set_temp(struct device *dev, struct device_attribute *da,
@@ -271,6 +288,13 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		goto exit_remove;
 	}
 
+	data->tz = thermal_zone_of_sensor_register(&client->dev,
+						   0,
+						   &client->dev,
+						   lm75_read_temp, NULL);
+	if (IS_ERR(data->tz))
+		data->tz = NULL;
+
 	dev_info(&client->dev, "%s: sensor '%s'\n",
 		 dev_name(data->hwmon_dev), client->name);
 
@@ -285,6 +309,7 @@ static int lm75_remove(struct i2c_client *client)
 {
 	struct lm75_data *data = i2c_get_clientdata(client);
 
+	thermal_zone_of_sensor_unregister(&client->dev, data->tz);
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&client->dev.kobj, &lm75_group);
 	lm75_write_value(client, LM75_REG_CONF, data->orig_conf);
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index d7b47abf37fe..6748b4583e7b 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -27,6 +27,8 @@
 #include <linux/mutex.h>
 #include <linux/device.h>
 #include <linux/jiffies.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
 
 #define	DRIVER_NAME "tmp102"
 
@@ -50,6 +52,7 @@
 
 struct tmp102 {
 	struct device *hwmon_dev;
+	struct thermal_zone_device *tz;
 	struct mutex lock;
 	u16 config_orig;
 	unsigned long last_update;
@@ -93,6 +96,15 @@ static struct tmp102 *tmp102_update_device(struct i2c_client *client)
 	return tmp102;
 }
 
+static int tmp102_read_temp(void *dev, long *temp)
+{
+	struct tmp102 *tmp102 = tmp102_update_device(to_i2c_client(dev));
+
+	*temp = tmp102->temp[0];
+
+	return 0;
+}
+
 static ssize_t tmp102_show_temp(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
@@ -204,6 +216,12 @@ static int tmp102_probe(struct i2c_client *client,
 		goto fail_remove_sysfs;
 	}
 
+	tmp102->tz = thermal_zone_of_sensor_register(&client->dev, 0,
+						     &client->dev,
+						     tmp102_read_temp, NULL);
+	if (IS_ERR(tmp102->tz))
+		tmp102->tz = NULL;
+
 	dev_info(&client->dev, "initialized\n");
 
 	return 0;
@@ -220,6 +238,7 @@ static int tmp102_remove(struct i2c_client *client)
 {
 	struct tmp102 *tmp102 = i2c_get_clientdata(client);
 
+	thermal_zone_of_sensor_unregister(&client->dev, tmp102->tz);
 	hwmon_device_unregister(tmp102->hwmon_dev);
 	sysfs_remove_group(&client->dev.kobj, &tmp102_attr_group);
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4a33351c25dc..7709b1dbf6cc 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -10,6 +10,11 @@ config ARM_GIC
 config GIC_NON_BANKED
 	bool
 
+config ARM_GIC_V3
+	bool
+	select IRQ_DOMAIN
+	select MULTI_IRQ_HANDLER
+
 config ARM_VIC
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index cda4cb5f7327..bf4667b34306 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_METAG)			+= irq-metag-ext.o
 obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
-obj-$(CONFIG_ARM_GIC)			+= irq-gic.o
+obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
 obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
new file mode 100644
index 000000000000..60ac704d2090
--- /dev/null
+++ b/drivers/irqchip/irq-gic-common.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include "irq-gic-common.h"
+
+void gic_configure_irq(unsigned int irq, unsigned int type,
+		       void __iomem *base, void (*sync_access)(void))
+{
+	u32 enablemask = 1 << (irq % 32);
+	u32 enableoff = (irq / 32) * 4;
+	u32 confmask = 0x2 << ((irq % 16) * 2);
+	u32 confoff = (irq / 16) * 4;
+	bool enabled = false;
+	u32 val;
+
+	/*
+	 * Read current configuration register, and insert the config
+	 * for "irq", depending on "type".
+	 */
+	val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
+	if (type == IRQ_TYPE_LEVEL_HIGH)
+		val &= ~confmask;
+	else if (type == IRQ_TYPE_EDGE_RISING)
+		val |= confmask;
+
+	/*
+	 * As recommended by the spec, disable the interrupt before changing
+	 * the configuration
+	 */
+	if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) {
+		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff);
+		if (sync_access)
+			sync_access();
+		enabled = true;
+	}
+
+	/*
+	 * Write back the new configuration, and possibly re-enable
+	 * the interrupt.
+	 */
+	writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+
+	if (enabled)
+		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
+
+	if (sync_access)
+		sync_access();
+}
+
+void __init gic_dist_config(void __iomem *base, int gic_irqs,
+			    void (*sync_access)(void))
+{
+	unsigned int i;
+
+	/*
+	 * Set all global interrupts to be level triggered, active low.
+	 */
+	for (i = 32; i < gic_irqs; i += 16)
+		writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4);
+
+	/*
+	 * Set priority on all global interrupts.
+	 */
+	for (i = 32; i < gic_irqs; i += 4)
+		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i);
+
+	/*
+	 * Disable all interrupts.  Leave the PPI and SGIs alone
+	 * as they are enabled by redistributor registers.
+	 */
+	for (i = 32; i < gic_irqs; i += 32)
+		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8);
+
+	if (sync_access)
+		sync_access();
+}
+
+void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
+{
+	int i;
+
+	/*
+	 * Deal with the banked PPI and SGI interrupts - disable all
+	 * PPI interrupts, ensure all SGI interrupts are enabled.
+	 */
+	writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR);
+	writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET);
+
+	/*
+	 * Set priority on PPI and SGI interrupts
+	 */
+	for (i = 0; i < 32; i += 4)
+		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+
+	if (sync_access)
+		sync_access();
+}
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
new file mode 100644
index 000000000000..b41f02481c3a
--- /dev/null
+++ b/drivers/irqchip/irq-gic-common.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _IRQ_GIC_COMMON_H
+#define _IRQ_GIC_COMMON_H
+
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+
+void gic_configure_irq(unsigned int irq, unsigned int type,
+                       void __iomem *base, void (*sync_access)(void));
+void gic_dist_config(void __iomem *base, int gic_irqs,
+		     void (*sync_access)(void));
+void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
+
+#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
new file mode 100644
index 000000000000..57eaa5a0b1e3
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/cputype.h>
+#include <asm/exception.h>
+#include <asm/smp_plat.h>
+
+#include "irq-gic-common.h"
+#include "irqchip.h"
+
+struct gic_chip_data {
+	void __iomem		*dist_base;
+	void __iomem		**redist_base;
+	void __percpu __iomem	**rdist;
+	struct irq_domain	*domain;
+	u64			redist_stride;
+	u32			redist_regions;
+	unsigned int		irq_nr;
+};
+
+static struct gic_chip_data gic_data __read_mostly;
+
+#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdist))
+#define gic_data_rdist_rd_base()	(*gic_data_rdist())
+#define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
+
+/* Our default, arbitrary priority value. Linux only uses one anyway. */
+#define DEFAULT_PMR_VALUE	0xf0
+
+static inline unsigned int gic_irq(struct irq_data *d)
+{
+	return d->hwirq;
+}
+
+static inline int gic_irq_in_rdist(struct irq_data *d)
+{
+	return gic_irq(d) < 32;
+}
+
+static inline void __iomem *gic_dist_base(struct irq_data *d)
+{
+	if (gic_irq_in_rdist(d))	/* SGI+PPI -> SGI_base for this CPU */
+		return gic_data_rdist_sgi_base();
+
+	if (d->hwirq <= 1023)		/* SPI -> dist_base */
+		return gic_data.dist_base;
+
+	if (d->hwirq >= 8192)
+		BUG();		/* LPI Detected!!! */
+
+	return NULL;
+}
+
+static void gic_do_wait_for_rwp(void __iomem *base)
+{
+	u32 count = 1000000;	/* 1s! */
+
+	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("RWP timeout, gone fishing\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/* Wait for completion of a distributor change */
+static void gic_dist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data.dist_base);
+}
+
+/* Wait for completion of a redistributor change */
+static void gic_redist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
+}
+
+/* Low level accessors */
+static u64 gic_read_iar(void)
+{
+	u64 irqstat;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	return irqstat;
+}
+
+static void gic_write_pmr(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_write_ctlr(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_grpen1(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_sgi1r(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_enable_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	val |= ICC_SRE_EL1_SRE;
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	isb();
+
+	/*
+	 * Need to check that the SRE bit has actually been set. If
+	 * not, it means that SRE is disabled at EL2. We're going to
+	 * die painfully, and there is nothing we can do about it.
+	 *
+	 * Kindly inform the luser.
+	 */
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	if (!(val & ICC_SRE_EL1_SRE))
+		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
+}
+
+static void gic_enable_redist(void)
+{
+	void __iomem *rbase;
+	u32 count = 1000000;	/* 1s! */
+	u32 val;
+
+	rbase = gic_data_rdist_rd_base();
+
+	/* Wake up this CPU redistributor */
+	val = readl_relaxed(rbase + GICR_WAKER);
+	val &= ~GICR_WAKER_ProcessorSleep;
+	writel_relaxed(val, rbase + GICR_WAKER);
+
+	while (readl_relaxed(rbase + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("redist didn't wake up...\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/*
+ * Routines to disable, enable, EOI and route interrupts
+ */
+static void gic_poke_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4);
+	rwp_wait();
+}
+
+static int gic_peek_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d))
+		base = gic_data_rdist_sgi_base();
+	else
+		base = gic_data.dist_base;
+
+	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+
+static void gic_mask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ICENABLER);
+}
+
+static void gic_unmask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ISENABLER);
+}
+
+static void gic_eoi_irq(struct irq_data *d)
+{
+	gic_write_eoir(gic_irq(d));
+}
+
+static int gic_set_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int irq = gic_irq(d);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	/* Interrupt configuration for SGIs can't be changed */
+	if (irq < 16)
+		return -EINVAL;
+
+	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+		return -EINVAL;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	gic_configure_irq(irq, type, base, rwp_wait);
+
+	return 0;
+}
+
+static u64 gic_mpidr_to_affinity(u64 mpidr)
+{
+	u64 aff;
+
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8  |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	return aff;
+}
+
+static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+{
+	u64 irqnr;
+
+	do {
+		irqnr = gic_read_iar();
+
+		if (likely(irqnr > 15 && irqnr < 1020)) {
+			u64 irq = irq_find_mapping(gic_data.domain, irqnr);
+			if (likely(irq)) {
+				handle_IRQ(irq, regs);
+				continue;
+			}
+
+			WARN_ONCE(true, "Unexpected SPI received!\n");
+			gic_write_eoir(irqnr);
+		}
+		if (irqnr < 16) {
+			gic_write_eoir(irqnr);
+#ifdef CONFIG_SMP
+			handle_IPI(irqnr, regs);
+#else
+			WARN_ONCE(true, "Unexpected SGI received!\n");
+#endif
+			continue;
+		}
+	} while (irqnr != ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __init gic_dist_init(void)
+{
+	unsigned int i;
+	u64 affinity;
+	void __iomem *base = gic_data.dist_base;
+
+	/* Disable the distributor */
+	writel_relaxed(0, base + GICD_CTLR);
+	gic_dist_wait_for_rwp();
+
+	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+
+	/* Enable distributor with ARE, Group1 */
+	writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1,
+		       base + GICD_CTLR);
+
+	/*
+	 * Set all global interrupts to the boot CPU only. ARE must be
+	 * enabled.
+	 */
+	affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
+	for (i = 32; i < gic_data.irq_nr; i++)
+		writeq_relaxed(affinity, base + GICD_IROUTER + i * 8);
+}
+
+static int gic_populate_rdist(void)
+{
+	u64 mpidr = cpu_logical_map(smp_processor_id());
+	u64 typer;
+	u32 aff;
+	int i;
+
+	/*
+	 * Convert affinity to a 32bit value that can be matched to
+	 * GICR_TYPER bits [63:32].
+	 */
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	for (i = 0; i < gic_data.redist_regions; i++) {
+		void __iomem *ptr = gic_data.redist_base[i];
+		u32 reg;
+
+		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
+		if (reg != GIC_PIDR2_ARCH_GICv3 &&
+		    reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */
+			pr_warn("No redistributor present @%p\n", ptr);
+			break;
+		}
+
+		do {
+			typer = readq_relaxed(ptr + GICR_TYPER);
+			if ((typer >> 32) == aff) {
+				gic_data_rdist_rd_base() = ptr;
+				pr_info("CPU%d: found redistributor %llx @%p\n",
+					smp_processor_id(),
+					(unsigned long long)mpidr, ptr);
+				return 0;
+			}
+
+			if (gic_data.redist_stride) {
+				ptr += gic_data.redist_stride;
+			} else {
+				ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */
+				if (typer & GICR_TYPER_VLPIS)
+					ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */
+			}
+		} while (!(typer & GICR_TYPER_LAST));
+	}
+
+	/* We couldn't even deal with ourselves... */
+	WARN(true, "CPU%d: mpidr %llx has no re-distributor!\n",
+	     smp_processor_id(), (unsigned long long)mpidr);
+	return -ENODEV;
+}
+
+static void gic_cpu_init(void)
+{
+	void __iomem *rbase;
+
+	/* Register ourselves with the rest of the world */
+	if (gic_populate_rdist())
+		return;
+
+	gic_enable_redist();
+
+	rbase = gic_data_rdist_sgi_base();
+
+	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
+
+	/* Enable system registers */
+	gic_enable_sre();
+
+	/* Set priority mask register */
+	gic_write_pmr(DEFAULT_PMR_VALUE);
+
+	/* EOI deactivates interrupt too (mode 0) */
+	gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+
+	/* ... and let's hit the road... */
+	gic_write_grpen1(1);
+}
+
+#ifdef CONFIG_SMP
+static int gic_secondary_init(struct notifier_block *nfb,
+			      unsigned long action, void *hcpu)
+{
+	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+		gic_cpu_init();
+	return NOTIFY_OK;
+}
+
+/*
+ * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
+ * priority because the GIC needs to be up before the ARM generic timers.
+ */
+static struct notifier_block gic_cpu_notifier = {
+	.notifier_call = gic_secondary_init,
+	.priority = 100,
+};
+
+static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
+				   u64 cluster_id)
+{
+	int cpu = *base_cpu;
+	u64 mpidr = cpu_logical_map(cpu);
+	u16 tlist = 0;
+
+	while (cpu < nr_cpu_ids) {
+		/*
+		 * If we ever get a cluster of more than 16 CPUs, just
+		 * scream and skip that CPU.
+		 */
+		if (WARN_ON((mpidr & 0xff) >= 16))
+			goto out;
+
+		tlist |= 1 << (mpidr & 0xf);
+
+		cpu = cpumask_next(cpu, mask);
+		if (cpu == nr_cpu_ids)
+			goto out;
+
+		mpidr = cpu_logical_map(cpu);
+
+		if (cluster_id != (mpidr & ~0xffUL)) {
+			cpu--;
+			goto out;
+		}
+	}
+out:
+	*base_cpu = cpu;
+	return tlist;
+}
+
+static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
+{
+	u64 val;
+
+	val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48	|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32	|
+	       irq << 24			    		|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16	|
+	       tlist);
+
+	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+	gic_write_sgi1r(val);
+}
+
+static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
+{
+	int cpu;
+
+	if (WARN_ON(irq >= 16))
+		return;
+
+	/*
+	 * Ensure that stores to Normal memory are visible to the
+	 * other CPUs before issuing the IPI.
+	 */
+	smp_wmb();
+
+	for_each_cpu_mask(cpu, *mask) {
+		u64 cluster_id = cpu_logical_map(cpu) & ~0xffUL;
+		u16 tlist;
+
+		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
+		gic_send_sgi(cluster_id, tlist, irq);
+	}
+
+	/* Force the above writes to ICC_SGI1R_EL1 to be executed */
+	isb();
+}
+
+static void gic_smp_init(void)
+{
+	set_smp_cross_call(gic_raise_softirq);
+	register_cpu_notifier(&gic_cpu_notifier);
+}
+
+static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
+			    bool force)
+{
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
+	void __iomem *reg;
+	int enabled;
+	u64 val;
+
+	if (gic_irq_in_rdist(d))
+		return -EINVAL;
+
+	/* If interrupt was enabled, disable it first */
+	enabled = gic_peek_irq(d, GICD_ISENABLER);
+	if (enabled)
+		gic_mask_irq(d);
+
+	reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8);
+	val = gic_mpidr_to_affinity(cpu_logical_map(cpu));
+
+	writeq_relaxed(val, reg);
+
+	/*
+	 * If the interrupt was enabled, enabled it again. Otherwise,
+	 * just wait for the distributor to have digested our changes.
+	 */
+	if (enabled)
+		gic_unmask_irq(d);
+	else
+		gic_dist_wait_for_rwp();
+
+	return IRQ_SET_MASK_OK;
+}
+#else
+#define gic_set_affinity	NULL
+#define gic_smp_init()		do { } while(0)
+#endif
+
+static struct irq_chip gic_chip = {
+	.name			= "GICv3",
+	.irq_mask		= gic_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_set_affinity	= gic_set_affinity,
+};
+
+static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	/* SGIs are private to the core kernel */
+	if (hw < 16)
+		return -EPERM;
+	/* PPIs */
+	if (hw < 32) {
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_percpu_devid_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+	/* SPIs */
+	if (hw >= 32 && hw < gic_data.irq_nr) {
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_fasteoi_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	}
+	irq_set_chip_data(irq, d->host_data);
+	return 0;
+}
+
+static int gic_irq_domain_xlate(struct irq_domain *d,
+				struct device_node *controller,
+				const u32 *intspec, unsigned int intsize,
+				unsigned long *out_hwirq, unsigned int *out_type)
+{
+	if (d->of_node != controller)
+		return -EINVAL;
+	if (intsize < 3)
+		return -EINVAL;
+
+	switch(intspec[0]) {
+	case 0:			/* SPI */
+		*out_hwirq = intspec[1] + 32;
+		break;
+	case 1:			/* PPI */
+		*out_hwirq = intspec[1] + 16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
+	return 0;
+}
+
+static const struct irq_domain_ops gic_irq_domain_ops = {
+	.map = gic_irq_domain_map,
+	.xlate = gic_irq_domain_xlate,
+};
+
+static int __init gic_of_init(struct device_node *node, struct device_node *parent)
+{
+	void __iomem *dist_base;
+	void __iomem **redist_base;
+	u64 redist_stride;
+	u32 redist_regions;
+	u32 reg;
+	int gic_irqs;
+	int err;
+	int i;
+
+	dist_base = of_iomap(node, 0);
+	if (!dist_base) {
+		pr_err("%s: unable to map gic dist registers\n",
+			node->full_name);
+		return -ENXIO;
+	}
+
+	reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) {
+		pr_err("%s: no distributor detected, giving up\n",
+			node->full_name);
+		err = -ENODEV;
+		goto out_unmap_dist;
+	}
+
+	if (of_property_read_u32(node, "#redistributor-regions", &redist_regions))
+		redist_regions = 1;
+
+	redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL);
+	if (!redist_base) {
+		err = -ENOMEM;
+		goto out_unmap_dist;
+	}
+
+	for (i = 0; i < redist_regions; i++) {
+		redist_base[i] = of_iomap(node, 1 + i);
+		if (!redist_base[i]) {
+			pr_err("%s: couldn't map region %d\n",
+			       node->full_name, i);
+			err = -ENODEV;
+			goto out_unmap_rdist;
+		}
+	}
+
+	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
+		redist_stride = 0;
+
+	gic_data.dist_base = dist_base;
+	gic_data.redist_base = redist_base;
+	gic_data.redist_regions = redist_regions;
+	gic_data.redist_stride = redist_stride;
+
+	/*
+	 * Find out how many interrupts are supported.
+	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
+	 */
+	gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f;
+	gic_irqs = (gic_irqs + 1) * 32;
+	if (gic_irqs > 1020)
+		gic_irqs = 1020;
+	gic_data.irq_nr = gic_irqs;
+
+	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
+					      &gic_data);
+	gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist));
+
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	set_handle_irq(gic_handle_irq);
+
+	gic_smp_init();
+	gic_dist_init();
+	gic_cpu_init();
+
+	return 0;
+
+out_free:
+	if (gic_data.domain)
+		irq_domain_remove(gic_data.domain);
+	free_percpu(gic_data.rdist);
+out_unmap_rdist:
+	for (i = 0; i < redist_regions; i++)
+		if (redist_base[i])
+			iounmap(redist_base[i]);
+	kfree(redist_base);
+out_unmap_dist:
+	iounmap(dist_base);
+	return err;
+}
+
+IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index c8ee1cb023b8..4cb670e61707 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/arm/common/gic.c
- *
  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -41,17 +39,19 @@
 #include <linux/slab.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
+#include <trace/events/arm-ipi.h>
 
 #include <asm/cputype.h>
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
 
+#include "irq-gic-common.h"
 #include "irqchip.h"
 
 union gic_base {
 	void __iomem *common_base;
-	void __percpu __iomem **percpu_base;
+	void __percpu * __iomem *percpu_base;
 };
 
 struct gic_chip_data {
@@ -189,12 +189,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
 	void __iomem *base = gic_dist_base(d);
 	unsigned int gicirq = gic_irq(d);
-	u32 enablemask = 1 << (gicirq % 32);
-	u32 enableoff = (gicirq / 32) * 4;
-	u32 confmask = 0x2 << ((gicirq % 16) * 2);
-	u32 confoff = (gicirq / 16) * 4;
-	bool enabled = false;
-	u32 val;
 
 	/* Interrupt configuration for SGIs can't be changed */
 	if (gicirq < 16)
@@ -208,25 +202,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (gic_arch_extn.irq_set_type)
 		gic_arch_extn.irq_set_type(d, type);
 
-	val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
-	if (type == IRQ_TYPE_LEVEL_HIGH)
-		val &= ~confmask;
-	else if (type == IRQ_TYPE_EDGE_RISING)
-		val |= confmask;
-
-	/*
-	 * As recommended by the spec, disable the interrupt before changing
-	 * the configuration
-	 */
-	if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) {
-		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff);
-		enabled = true;
-	}
-
-	writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
-
-	if (enabled)
-		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
+	gic_configure_irq(gicirq, type, base, NULL);
 
 	raw_spin_unlock(&irq_controller_lock);
 
@@ -247,21 +223,16 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
-	unsigned int cpu, shift = (gic_irq(d) % 4) * 8;
+	unsigned int shift = (gic_irq(d) % 4) * 8;
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	u32 val, mask, bit;
 
-	if (!force)
-		cpu = cpumask_any_and(mask_val, cpu_online_mask);
-	else
-		cpu = cpumask_first(mask_val);
-
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -285,7 +256,7 @@ static int gic_set_wake(struct irq_data *d, unsigned int on)
 #define gic_set_wake	NULL
 #endif
 
-static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
 	u32 irqstat, irqnr;
 	struct gic_chip_data *gic = &gic_data[0];
@@ -293,7 +264,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 
 	do {
 		irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
-		irqnr = irqstat & ~0x1c00;
+		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (likely(irqnr > 15 && irqnr < 1021)) {
 			irqnr = irq_find_mapping(gic->domain, irqnr);
@@ -389,12 +360,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	writel_relaxed(0, base + GIC_DIST_CTRL);
 
 	/*
-	 * Set all global interrupts to be level triggered, active low.
-	 */
-	for (i = 32; i < gic_irqs; i += 16)
-		writel_relaxed(0, base + GIC_DIST_CONFIG + i * 4 / 16);
-
-	/*
 	 * Set all global interrupts to this CPU only.
 	 */
 	cpumask = gic_get_cpumask(gic);
@@ -403,18 +368,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	for (i = 32; i < gic_irqs; i += 4)
 		writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
 
-	/*
-	 * Set priority on all global interrupts.
-	 */
-	for (i = 32; i < gic_irqs; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
-
-	/*
-	 * Disable all interrupts.  Leave the PPI and SGIs alone
-	 * as these enables are banked registers.
-	 */
-	for (i = 32; i < gic_irqs; i += 32)
-		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
+	gic_dist_config(base, gic_irqs, NULL);
 
 	writel_relaxed(1, base + GIC_DIST_CTRL);
 }
@@ -441,23 +395,18 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 		if (i != cpu)
 			gic_cpu_map[i] &= ~cpu_mask;
 
-	/*
-	 * Deal with the banked PPI and SGI interrupts - disable all
-	 * PPI interrupts, ensure all SGI interrupts are enabled.
-	 */
-	writel_relaxed(0xffff0000, dist_base + GIC_DIST_ENABLE_CLEAR);
-	writel_relaxed(0x0000ffff, dist_base + GIC_DIST_ENABLE_SET);
-
-	/*
-	 * Set priority on PPI and SGI interrupts
-	 */
-	for (i = 0; i < 32; i += 4)
-		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4);
+	gic_cpu_config(dist_base, NULL);
 
 	writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+void gic_cpu_if_down(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+}
+
 #ifdef CONFIG_CPU_PM
 /*
  * Saves the GIC distributor registers during suspend or idle.  Must be called
@@ -648,26 +597,172 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 #endif
 
 #ifdef CONFIG_SMP
-void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
+static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
-	for_each_cpu(cpu, mask)
+	for_each_cpu(cpu, mask) {
+		trace_arm_ipi_send(irq, cpu);
 		map |= gic_cpu_map[cpu];
+	}
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
-	 * other CPUs before issuing the IPI.
+	 * other CPUs before they observe us issuing the IPI.
 	 */
-	dsb();
+	dmb(ishst);
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 #endif
 
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+	BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+	cpu_id = 1 << cpu_id;
+	/* this always happens on GIC0 */
+	writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+		return -1;
+	return __ffs(cpu_bit);
+}
+
+/*
+ * gic_migrate_target - migrate IRQs to another CPU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int cur_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, cur_target_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	cur_cpu_id = __ffs(gic_cpu_map[cpu]);
+	cur_target_mask = 0x01010101 << cur_cpu_id;
+	ror_val = (cur_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	/* Update the target interface for this logical CPU */
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	/*
+	 * Find all the peripheral interrupts targetting the current
+	 * CPU interface and migrate them to the new CPU interface.
+	 * We skip DIST_TARGET 0 to 7 as they are read-only.
+	 */
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & cur_target_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (cur_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
+}
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+	if (!gic_dist_physaddr)
+		return 0;
+	return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+	struct resource res;
+	if (of_address_to_resource(node, 0, &res) == 0) {
+		gic_dist_physaddr = res.start;
+		pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+	}
+}
+
+#else
+#define gic_init_physaddr(node)  do { } while (0)
+#endif
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hw)
 {
@@ -680,16 +775,25 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler(irq, &gic_chip,
 					 handle_fasteoi_irq);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+		gic_routable_irq_domain_ops->map(d, irq, hw);
 	}
 	irq_set_chip_data(irq, d->host_data);
 	return 0;
 }
 
+static void gic_irq_domain_unmap(struct irq_domain *d, unsigned int irq)
+{
+	gic_routable_irq_domain_ops->unmap(d, irq);
+}
+
 static int gic_irq_domain_xlate(struct irq_domain *d,
 				struct device_node *controller,
 				const u32 *intspec, unsigned int intsize,
 				unsigned long *out_hwirq, unsigned int *out_type)
 {
+	unsigned long ret = 0;
+
 	if (d->of_node != controller)
 		return -EINVAL;
 	if (intsize < 3)
@@ -699,11 +803,20 @@ static int gic_irq_domain_xlate(struct irq_domain *d,
 	*out_hwirq = intspec[1] + 16;
 
 	/* For SPIs, we need to add 16 more to get the GIC irq ID number */
-	if (!intspec[0])
-		*out_hwirq += 16;
+	if (!intspec[0]) {
+		ret = gic_routable_irq_domain_ops->xlate(d, controller,
+							 intspec,
+							 intsize,
+							 out_hwirq,
+							 out_type);
+
+		if (IS_ERR_VALUE(ret))
+			return ret;
+	}
 
 	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
-	return 0;
+
+	return ret;
 }
 
 #ifdef CONFIG_SMP
@@ -725,11 +838,43 @@ static struct notifier_block __cpuinitdata gic_cpu_notifier = {
 };
 #endif
 
-const struct irq_domain_ops gic_irq_domain_ops = {
+static const struct irq_domain_ops gic_irq_domain_ops = {
 	.map = gic_irq_domain_map,
+	.unmap = gic_irq_domain_unmap,
 	.xlate = gic_irq_domain_xlate,
 };
 
+/* Default functions for routable irq domain */
+static int gic_routable_irq_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	return 0;
+}
+
+static void gic_routable_irq_domain_unmap(struct irq_domain *d,
+					  unsigned int irq)
+{
+}
+
+static int gic_routable_irq_domain_xlate(struct irq_domain *d,
+				struct device_node *controller,
+				const u32 *intspec, unsigned int intsize,
+				unsigned long *out_hwirq,
+				unsigned int *out_type)
+{
+	*out_hwirq += 16;
+	return 0;
+}
+
+const struct irq_domain_ops gic_default_routable_irq_domain_ops = {
+	.map = gic_routable_irq_domain_map,
+	.unmap = gic_routable_irq_domain_unmap,
+	.xlate = gic_routable_irq_domain_xlate,
+};
+
+const struct irq_domain_ops *gic_routable_irq_domain_ops =
+					&gic_default_routable_irq_domain_ops;
+
 void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 			   void __iomem *dist_base, void __iomem *cpu_base,
 			   u32 percpu_offset, struct device_node *node)
@@ -737,6 +882,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	irq_hw_number_t hwirq_base;
 	struct gic_chip_data *gic;
 	int gic_irqs, irq_base, i;
+	int nr_routable_irqs;
 
 	BUG_ON(gic_nr >= MAX_GIC_NR);
 
@@ -804,23 +950,35 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	gic->gic_irqs = gic_irqs;
 
 	gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
-	irq_base = irq_alloc_descs(irq_start, 16, gic_irqs, numa_node_id());
-	if (IS_ERR_VALUE(irq_base)) {
-		WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
-		     irq_start);
-		irq_base = irq_start;
+
+	if (of_property_read_u32(node, "arm,routable-irqs",
+				 &nr_routable_irqs)) {
+		irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
+					   numa_node_id());
+		if (IS_ERR_VALUE(irq_base)) {
+			WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
+			     irq_start);
+			irq_base = irq_start;
+		}
+
+		gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base,
+					hwirq_base, &gic_irq_domain_ops, gic);
+	} else {
+		gic->domain = irq_domain_add_linear(node, nr_routable_irqs,
+						    &gic_irq_domain_ops,
+						    gic);
 	}
-	gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base,
-				    hwirq_base, &gic_irq_domain_ops, gic);
+
 	if (WARN_ON(!gic->domain))
 		return;
 
+	if (gic_nr == 0) {
 #ifdef CONFIG_SMP
-	set_smp_cross_call(gic_raise_softirq);
-	register_cpu_notifier(&gic_cpu_notifier);
+		set_smp_cross_call(gic_raise_softirq);
+		register_cpu_notifier(&gic_cpu_notifier);
 #endif
-
-	set_handle_irq(gic_handle_irq);
+		set_handle_irq(gic_handle_irq);
+	}
 
 	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic);
@@ -831,7 +989,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 #ifdef CONFIG_OF
 static int gic_cnt __initdata;
 
-int __init gic_of_init(struct device_node *node, struct device_node *parent)
+static int __init
+gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *cpu_base;
 	void __iomem *dist_base;
@@ -851,6 +1010,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 		percpu_offset = 0;
 
 	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	if (!gic_cnt)
+		gic_init_physaddr(node);
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
@@ -859,6 +1020,7 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 	gic_cnt++;
 	return 0;
 }
+IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init);
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 543ad6a79505..fefef7ebcbec 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1 +1,5 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX)		+= mailbox.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..9a937ef35068
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,467 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ	BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL	BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK	BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+	int idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* See if there is any space left */
+	if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -ENOBUFS;
+	}
+
+	idx = chan->msg_free;
+	chan->msg_data[idx] = mssg;
+	chan->msg_count++;
+
+	if (idx == MBOX_TX_QUEUE_LEN - 1)
+		chan->msg_free = 0;
+	else
+		chan->msg_free++;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+	unsigned count, idx;
+	unsigned long flags;
+	void *data;
+	int err;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (!chan->msg_count || chan->active_req)
+		goto exit;
+
+	count = chan->msg_count;
+	idx = chan->msg_free;
+	if (idx >= count)
+		idx -= count;
+	else
+		idx += MBOX_TX_QUEUE_LEN - count;
+
+	data = chan->msg_data[idx];
+
+	/* Try to submit a message to the MBOX controller */
+	err = chan->mbox->ops->send_data(chan, data);
+	if (!err) {
+		chan->active_req = data;
+		chan->msg_count--;
+	}
+exit:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+	unsigned long flags;
+	void *mssg;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	mssg = chan->active_req;
+	chan->active_req = NULL;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Submit next message */
+	msg_submit(chan);
+
+	/* Notify the client */
+	if (mssg && chan->cl->tx_done)
+		chan->cl->tx_done(chan->cl, mssg, r);
+
+	if (chan->cl->tx_block)
+		complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+	struct mbox_controller *mbox = (struct mbox_controller *)data;
+	bool txdone, resched = false;
+	int i;
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		if (chan->active_req && chan->cl) {
+			resched = true;
+			txdone = chan->mbox->ops->last_tx_done(chan);
+			if (txdone)
+				tx_tick(chan, 0);
+		}
+	}
+
+	if (resched)
+		mod_timer(&mbox->poll, jiffies +
+				msecs_to_jiffies(mbox->period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ *				received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+	/* No buffering the received data */
+	if (chan->cl->rx_callback)
+		chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ *			framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+		dev_err(chan->mbox->dev,
+		       "Controller can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+		dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ *			received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ *          some data.
+ *         False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+	if (chan->mbox->ops->peek_data)
+		return chan->mbox->ops->peek_data(chan);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message -	For client to submit a message to be
+ *				sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ *  In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ *	or transmission over chan (blocking mode).
+ *	Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+	int t;
+
+	if (!chan || !chan->cl)
+		return -EINVAL;
+
+	t = add_to_rbuf(chan, mssg);
+	if (t < 0) {
+		dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+		return t;
+	}
+
+	msg_submit(chan);
+
+	INIT_COMPLETION(chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL)
+		poll_txdone((unsigned long)chan->mbox);
+
+	if (chan->cl->tx_block && chan->active_req) {
+		unsigned long wait;
+		int ret;
+
+		if (!chan->cl->tx_tout) /* wait forever */
+			wait = msecs_to_jiffies(3600000);
+		else
+			wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		if (ret == 0) {
+			t = -EIO;
+			tx_tick(chan, -EIO);
+		}
+	}
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ *		ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+	struct device *dev = cl->dev;
+	struct mbox_controller *mbox;
+	struct of_phandle_args spec;
+	struct mbox_chan *chan;
+	unsigned long flags;
+	int ret;
+
+	if (!dev || !dev->of_node) {
+		pr_debug("%s: No owner device node\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&con_mutex);
+
+	if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+				       "#mbox-cells", index, &spec)) {
+		dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	chan = NULL;
+	list_for_each_entry(mbox, &mbox_cons, node)
+		if (mbox->dev->of_node == spec.np) {
+			chan = mbox->of_xlate(mbox, &spec);
+			break;
+		}
+
+	of_node_put(spec.np);
+
+	if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+		dev_dbg(dev, "%s: mailbox not free\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	ret = chan->mbox->ops->startup(chan);
+	if (ret) {
+		dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
+	}
+
+	mutex_unlock(&con_mutex);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ *			channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	chan->mbox->ops->shutdown(chan);
+
+	/* The queued TX requests are simply aborted, no callbacks are made */
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	module_put(chan->mbox->dev->driver->owner);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+		    const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+
+	if (ind >= mbox->num_chans)
+		return NULL;
+
+	return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+	int i, txdone;
+
+	/* Sanity check */
+	if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+		return -EINVAL;
+
+	if (mbox->txdone_irq)
+		txdone = TXDONE_BY_IRQ;
+	else if (mbox->txdone_poll)
+		txdone = TXDONE_BY_POLL;
+	else /* It has to be ACK then */
+		txdone = TXDONE_BY_ACK;
+
+	if (txdone == TXDONE_BY_POLL) {
+		mbox->poll.function = &poll_txdone;
+		mbox->poll.data = (unsigned long)mbox;
+		init_timer(&mbox->poll);
+	}
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		chan->cl = NULL;
+		chan->mbox = mbox;
+		chan->txdone_method = txdone;
+		spin_lock_init(&chan->lock);
+	}
+
+	if (!mbox->of_xlate)
+		mbox->of_xlate = of_mbox_index_xlate;
+
+	mutex_lock(&con_mutex);
+	list_add_tail(&mbox->node, &mbox_cons);
+	mutex_unlock(&con_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+	int i;
+
+	if (!mbox)
+		return;
+
+	mutex_lock(&con_mutex);
+
+	list_del(&mbox->node);
+
+	for (i = 0; i < mbox->num_chans; i++)
+		mbox_free_channel(&mbox->chans[i]);
+
+	if (mbox->txdone_poll)
+		del_timer_sync(&mbox->poll);
+
+	mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 
 #define IPCMxSOURCE(m)		((m) * 0x40)
 #define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d54e985748b7..a5e54f0d6a73 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1144,7 +1144,15 @@ config MCP_UCB1200_TS
 endmenu
 
 config VEXPRESS_CONFIG
-	bool
+	bool "ARM Versatile Express platform infrastructure"
+	depends on ARM || ARM64
 	help
 	  Platform configuration infrastructure for the ARM Ltd.
 	  Versatile Express.
+
+config VEXPRESS_SPC
+	bool "Versatile Express SPC driver support"
+	depends on ARM
+	depends on VEXPRESS_CONFIG
+	help
+	  Serial Power Controller driver for ARM Ltd. test chips.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 718e94a2a9a7..3a0120315aa3 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -153,5 +153,6 @@ obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
+obj-$(CONFIG_VEXPRESS_SPC)	+= vexpress-spc.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c
index 84ce6b9daa3d..1af2b0e0182f 100644
--- a/drivers/mfd/vexpress-config.c
+++ b/drivers/mfd/vexpress-config.c
@@ -86,29 +86,13 @@ void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge)
 }
 EXPORT_SYMBOL(vexpress_config_bridge_unregister);
 
-
-struct vexpress_config_func {
-	struct vexpress_config_bridge *bridge;
-	void *func;
-};
-
-struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
-		struct device_node *node)
+static struct vexpress_config_bridge *
+		vexpress_config_bridge_find(struct device_node *node)
 {
-	struct device_node *bridge_node;
-	struct vexpress_config_func *func;
 	int i;
+	struct vexpress_config_bridge *res = NULL;
+	struct device_node *bridge_node = of_node_get(node);
 
-	if (WARN_ON(dev && node && dev->of_node != node))
-		return NULL;
-	if (dev && !node)
-		node = dev->of_node;
-
-	func = kzalloc(sizeof(*func), GFP_KERNEL);
-	if (!func)
-		return NULL;
-
-	bridge_node = of_node_get(node);
 	while (bridge_node) {
 		const __be32 *prop = of_get_property(bridge_node,
 				"arm,vexpress,config-bridge", NULL);
@@ -129,13 +113,46 @@ struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
 
 		if (test_bit(i, vexpress_config_bridges_map) &&
 				bridge->node == bridge_node) {
-			func->bridge = bridge;
-			func->func = bridge->info->func_get(dev, node);
+			res = bridge;
 			break;
 		}
 	}
 	mutex_unlock(&vexpress_config_bridges_mutex);
 
+	return res;
+}
+
+
+struct vexpress_config_func {
+	struct vexpress_config_bridge *bridge;
+	void *func;
+};
+
+struct vexpress_config_func *__vexpress_config_func_get(
+		struct vexpress_config_bridge *bridge,
+		struct device *dev,
+		struct device_node *node,
+		const char *id)
+{
+	struct vexpress_config_func *func;
+
+	if (WARN_ON(dev && node && dev->of_node != node))
+		return NULL;
+	if (dev && !node)
+		node = dev->of_node;
+
+	if (!bridge)
+		bridge = vexpress_config_bridge_find(node);
+	if (!bridge)
+		return NULL;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return NULL;
+
+	func->bridge = bridge;
+	func->func = bridge->info->func_get(dev, node, id);
+
 	if (!func->func) {
 		of_node_put(node);
 		kfree(func);
diff --git a/drivers/mfd/vexpress-spc.c b/drivers/mfd/vexpress-spc.c
new file mode 100644
index 000000000000..0c6718abf1ba
--- /dev/null
+++ b/drivers/mfd/vexpress-spc.c
@@ -0,0 +1,633 @@
+/*
+ * Versatile Express Serial Power Controller (SPC) support
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * Authors: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
+ *          Achin Gupta           <achin.gupta@arm.com>
+ *          Lorenzo Pieralisi     <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/vexpress.h>
+
+#include <asm/cacheflush.h>
+
+#define SCC_CFGREG19		0x120
+#define SCC_CFGREG20		0x124
+#define A15_CONF		0x400
+#define A7_CONF			0x500
+#define SYS_INFO		0x700
+#define PERF_LVL_A15		0xB00
+#define PERF_REQ_A15		0xB04
+#define PERF_LVL_A7		0xB08
+#define PERF_REQ_A7		0xB0c
+#define SYS_CFGCTRL		0xB10
+#define SYS_CFGCTRL_REQ		0xB14
+#define PWC_STATUS		0xB18
+#define PWC_FLAG		0xB1c
+#define WAKE_INT_MASK		0xB24
+#define WAKE_INT_RAW		0xB28
+#define WAKE_INT_STAT		0xB2c
+#define A15_PWRDN_EN		0xB30
+#define A7_PWRDN_EN		0xB34
+#define A7_PWRDNACK		0xB54
+#define A15_BX_ADDR0		0xB68
+#define SYS_CFG_WDATA		0xB70
+#define SYS_CFG_RDATA		0xB74
+#define A7_BX_ADDR0		0xB78
+
+#define GBL_WAKEUP_INT_MSK	(0x3 << 10)
+
+#define CLKF_SHIFT		16
+#define CLKF_MASK		0x1FFF
+#define CLKR_SHIFT		0
+#define CLKR_MASK		0x3F
+#define CLKOD_SHIFT		8
+#define CLKOD_MASK		0xF
+
+#define OPP_FUNCTION		6
+#define OPP_BASE_DEVICE		0x300
+#define OPP_A15_OFFSET		0x4
+#define OPP_A7_OFFSET		0xc
+
+#define SYS_CFGCTRL_START	(1 << 31)
+#define SYS_CFGCTRL_WRITE	(1 << 30)
+#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
+
+#define MAX_OPPS	8
+#define MAX_CLUSTERS	2
+
+enum {
+	A15_OPP_TYPE		= 0,
+	A7_OPP_TYPE		= 1,
+	SYS_CFGCTRL_TYPE	= 2,
+	INVALID_TYPE
+};
+
+#define STAT_COMPLETE(type)	((1 << 0) << (type << 2))
+#define STAT_ERR(type)		((1 << 1) << (type << 2))
+#define RESPONSE_MASK(type)	(STAT_COMPLETE(type) | STAT_ERR(type))
+
+struct vexpress_spc_drvdata {
+	void __iomem *baseaddr;
+	u32 a15_clusid;
+	int irq;
+	u32 cur_req_type;
+	u32 freqs[MAX_CLUSTERS][MAX_OPPS];
+	int freqs_cnt[MAX_CLUSTERS];
+};
+
+enum spc_func_type {
+	CONFIG_FUNC = 0,
+	PERF_FUNC   = 1,
+};
+
+struct vexpress_spc_func {
+	enum spc_func_type type;
+	u32 function;
+	u32 device;
+};
+
+static struct vexpress_spc_drvdata *info;
+static u32 *vexpress_spc_config_data;
+static struct vexpress_config_bridge *vexpress_spc_config_bridge;
+static struct vexpress_config_func *opp_func, *perf_func;
+
+static int vexpress_spc_load_result = -EAGAIN;
+
+static bool vexpress_spc_initialized(void)
+{
+	return vexpress_spc_load_result == 0;
+}
+
+/**
+ * vexpress_spc_write_resume_reg() - set the jump address used for warm boot
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @cpu: mpidr[7:0] bitfield describing cpu affinity level
+ * @addr: physical resume address
+ */
+void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr)
+{
+	void __iomem *baseaddr;
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return;
+
+	if (cluster != info->a15_clusid)
+		baseaddr = info->baseaddr + A7_BX_ADDR0 + (cpu << 2);
+	else
+		baseaddr = info->baseaddr + A15_BX_ADDR0 + (cpu << 2);
+
+	writel_relaxed(addr, baseaddr);
+}
+
+/**
+ * vexpress_spc_get_nb_cpus() - get number of cpus in a cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * Return: number of cpus in the cluster
+ *         -EINVAL if cluster number invalid
+ */
+int vexpress_spc_get_nb_cpus(u32 cluster)
+{
+	u32 val;
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	val = readl_relaxed(info->baseaddr + SYS_INFO);
+	val = (cluster != info->a15_clusid) ? (val >> 20) : (val >> 16);
+	return val & 0xf;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_nb_cpus);
+
+/**
+ * vexpress_spc_get_performance - get current performance level of cluster
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: pointer to the performance level to be assigned
+ *
+ * Return: 0 on success
+ *         < 0 on read error
+ */
+int vexpress_spc_get_performance(u32 cluster, u32 *freq)
+{
+	u32 perf_cfg_reg;
+	int perf, ret;
+
+	if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	perf_cfg_reg = cluster != info->a15_clusid ? PERF_LVL_A7 : PERF_LVL_A15;
+	ret = vexpress_config_read(perf_func, perf_cfg_reg, &perf);
+
+	if (!ret)
+		*freq = info->freqs[cluster][perf];
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_performance);
+
+/**
+ * vexpress_spc_get_perf_index - get performance level corresponding to
+ *				 a frequency
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: frequency to be looked-up
+ *
+ * Return: perf level index on success
+ *         -EINVAL on error
+ */
+static int vexpress_spc_find_perf_index(u32 cluster, u32 freq)
+{
+	int idx;
+
+	for (idx = 0; idx < info->freqs_cnt[cluster]; idx++)
+		if (info->freqs[cluster][idx] == freq)
+			break;
+	return (idx == info->freqs_cnt[cluster]) ? -EINVAL : idx;
+}
+
+/**
+ * vexpress_spc_set_performance - set current performance level of cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: performance level to be programmed
+ *
+ * Returns: 0 on success
+ *          < 0 on write error
+ */
+int vexpress_spc_set_performance(u32 cluster, u32 freq)
+{
+	int ret, perf, offset;
+
+	if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	offset = (cluster != info->a15_clusid) ? PERF_LVL_A7 : PERF_LVL_A15;
+
+	perf = vexpress_spc_find_perf_index(cluster, freq);
+
+	if (perf < 0 || perf >= MAX_OPPS)
+		return -EINVAL;
+
+	ret = vexpress_config_write(perf_func, offset, perf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_set_performance);
+
+static void vexpress_spc_set_wake_intr(u32 mask)
+{
+	writel_relaxed(mask & VEXPRESS_SPC_WAKE_INTR_MASK,
+		       info->baseaddr + WAKE_INT_MASK);
+}
+
+static inline void reg_bitmask(u32 *reg, u32 mask, bool set)
+{
+	if (set)
+		*reg |= mask;
+	else
+		*reg &= ~mask;
+}
+
+/**
+ * vexpress_spc_set_global_wakeup_intr()
+ *
+ * Function to set/clear global wakeup IRQs. Not protected by locking since
+ * it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @set: if true, global wake-up IRQs are set, if false they are cleared
+ */
+void vexpress_spc_set_global_wakeup_intr(bool set)
+{
+	u32 wake_int_mask_reg = 0;
+
+	wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK);
+	reg_bitmask(&wake_int_mask_reg, GBL_WAKEUP_INT_MSK, set);
+	vexpress_spc_set_wake_intr(wake_int_mask_reg);
+}
+
+/**
+ * vexpress_spc_set_cpu_wakeup_irq()
+ *
+ * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since
+ * it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @cpu: mpidr[7:0] bitfield describing cpu affinity level
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @set: if true, wake-up IRQs are set, if false they are cleared
+ */
+void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set)
+{
+	u32 mask = 0;
+	u32 wake_int_mask_reg = 0;
+
+	mask = 1 << cpu;
+	if (info->a15_clusid != cluster)
+		mask <<= 4;
+
+	wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK);
+	reg_bitmask(&wake_int_mask_reg, mask, set);
+	vexpress_spc_set_wake_intr(wake_int_mask_reg);
+}
+
+/**
+ * vexpress_spc_powerdown_enable()
+ *
+ * Function to enable/disable cluster powerdown. Not protected by locking
+ * since it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @enable: if true enables powerdown, if false disables it
+ */
+void vexpress_spc_powerdown_enable(u32 cluster, bool enable)
+{
+	u32 pwdrn_reg = 0;
+
+	if (cluster >= MAX_CLUSTERS)
+		return;
+	pwdrn_reg = cluster != info->a15_clusid ? A7_PWRDN_EN : A15_PWRDN_EN;
+	writel_relaxed(enable, info->baseaddr + pwdrn_reg);
+}
+
+irqreturn_t vexpress_spc_irq_handler(int irq, void *data)
+{
+	int ret;
+	u32 status = readl_relaxed(info->baseaddr + PWC_STATUS);
+
+	if (!(status & RESPONSE_MASK(info->cur_req_type)))
+		return IRQ_NONE;
+
+	if ((status == STAT_COMPLETE(SYS_CFGCTRL_TYPE))
+			&& vexpress_spc_config_data) {
+		*vexpress_spc_config_data =
+				readl_relaxed(info->baseaddr + SYS_CFG_RDATA);
+		vexpress_spc_config_data = NULL;
+	}
+
+	ret = STAT_COMPLETE(info->cur_req_type) ? 0 : -EIO;
+	info->cur_req_type = INVALID_TYPE;
+	vexpress_config_complete(vexpress_spc_config_bridge, ret);
+	return IRQ_HANDLED;
+}
+
+/**
+ * Based on the firmware documentation, this is always fixed to 20
+ * All the 4 OSC: A15 PLL0/1, A7 PLL0/1 must be programmed same
+ * values for both control and value registers.
+ * This function uses A15 PLL 0 registers to compute multiple factor
+ * F out = F in * (CLKF + 1) / ((CLKOD + 1) * (CLKR + 1))
+ */
+static inline int __get_mult_factor(void)
+{
+	int i_div, o_div, f_div;
+	u32 tmp;
+
+	tmp = readl(info->baseaddr + SCC_CFGREG19);
+	f_div = (tmp >> CLKF_SHIFT) & CLKF_MASK;
+
+	tmp = readl(info->baseaddr + SCC_CFGREG20);
+	o_div = (tmp >> CLKOD_SHIFT) & CLKOD_MASK;
+	i_div = (tmp >> CLKR_SHIFT) & CLKR_MASK;
+
+	return (f_div + 1) / ((o_div + 1) * (i_div + 1));
+}
+
+/**
+ * vexpress_spc_populate_opps() - initialize opp tables from microcontroller
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * Return: 0 on success
+ *         < 0 on error
+ */
+static int vexpress_spc_populate_opps(u32 cluster)
+{
+	u32 data = 0, ret, i, offset;
+	int mult_fact = __get_mult_factor();
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	offset = cluster != info->a15_clusid ? OPP_A7_OFFSET : OPP_A15_OFFSET;
+	for (i = 0; i < MAX_OPPS; i++) {
+		ret = vexpress_config_read(opp_func, i + offset, &data);
+		if (!ret)
+			info->freqs[cluster][i] = (data & 0xFFFFF) * mult_fact;
+		else
+			break;
+	}
+
+	info->freqs_cnt[cluster] = i;
+	return ret;
+}
+
+/**
+ * vexpress_spc_get_freq_table() - Retrieve a pointer to the frequency
+ *				   table for a given cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @fptr: pointer to be initialized
+ * Return: operating points count on success
+ *         -EINVAL on pointer error
+ */
+int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr)
+{
+	if (WARN_ON_ONCE(!fptr || cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+	*fptr = info->freqs[cluster];
+	return info->freqs_cnt[cluster];
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_freq_table);
+
+static void *vexpress_spc_func_get(struct device *dev,
+		struct device_node *node, const char *id)
+{
+	struct vexpress_spc_func *spc_func;
+	u32 func_device[2];
+	int err = 0;
+
+	spc_func = kzalloc(sizeof(*spc_func), GFP_KERNEL);
+	if (!spc_func)
+		return NULL;
+
+	if (strcmp(id, "opp") == 0) {
+		spc_func->type = CONFIG_FUNC;
+		spc_func->function = OPP_FUNCTION;
+		spc_func->device = OPP_BASE_DEVICE;
+	} else if (strcmp(id, "perf") == 0) {
+		spc_func->type = PERF_FUNC;
+	} else if (node) {
+		of_node_get(node);
+		err = of_property_read_u32_array(node,
+				"arm,vexpress-sysreg,func", func_device,
+				ARRAY_SIZE(func_device));
+		of_node_put(node);
+		spc_func->type = CONFIG_FUNC;
+		spc_func->function = func_device[0];
+		spc_func->device = func_device[1];
+	}
+
+	if (WARN_ON(err)) {
+		kfree(spc_func);
+		return NULL;
+	}
+
+	pr_debug("func 0x%p = 0x%x, %d %d\n", spc_func,
+					     spc_func->function,
+					     spc_func->device,
+					     spc_func->type);
+
+	return spc_func;
+}
+
+static void vexpress_spc_func_put(void *func)
+{
+	kfree(func);
+}
+
+static int vexpress_spc_func_exec(void *func, int offset, bool write,
+				  u32 *data)
+{
+	struct vexpress_spc_func *spc_func = func;
+	u32 command;
+
+	if (!data)
+		return -EINVAL;
+	/*
+	 * Setting and retrieval of operating points is not part of
+	 * DCC config interface. It was made to go through the same
+	 * code path so that requests to the M3 can be serialized
+	 * properly with config reads/writes through the common
+	 * vexpress config interface
+	 */
+	switch (spc_func->type) {
+	case PERF_FUNC:
+		if (write) {
+			info->cur_req_type = (offset == PERF_LVL_A15) ?
+					A15_OPP_TYPE : A7_OPP_TYPE;
+			writel_relaxed(*data, info->baseaddr + offset);
+			return VEXPRESS_CONFIG_STATUS_WAIT;
+		} else {
+			*data = readl_relaxed(info->baseaddr + offset);
+			return VEXPRESS_CONFIG_STATUS_DONE;
+		}
+	case CONFIG_FUNC:
+		info->cur_req_type = SYS_CFGCTRL_TYPE;
+
+		command = SYS_CFGCTRL_START;
+		command |= write ? SYS_CFGCTRL_WRITE : 0;
+		command |= SYS_CFGCTRL_FUNC(spc_func->function);
+		command |= SYS_CFGCTRL_DEVICE(spc_func->device + offset);
+
+		pr_debug("command %x\n", command);
+
+		if (!write)
+			vexpress_spc_config_data = data;
+		else
+			writel_relaxed(*data, info->baseaddr + SYS_CFG_WDATA);
+		writel_relaxed(command, info->baseaddr + SYS_CFGCTRL);
+
+		return VEXPRESS_CONFIG_STATUS_WAIT;
+	default:
+		return -EINVAL;
+	}
+}
+
+struct vexpress_config_bridge_info vexpress_spc_config_bridge_info = {
+	.name = "vexpress-spc",
+	.func_get = vexpress_spc_func_get,
+	.func_put = vexpress_spc_func_put,
+	.func_exec = vexpress_spc_func_exec,
+};
+
+static const struct of_device_id vexpress_spc_ids[] __initconst = {
+	{ .compatible = "arm,vexpress-spc,v2p-ca15_a7" },
+	{ .compatible = "arm,vexpress-spc" },
+	{},
+};
+
+static int __init vexpress_spc_init(void)
+{
+	int ret;
+	struct device_node *node = of_find_matching_node(NULL,
+							 vexpress_spc_ids);
+
+	if (!node)
+		return -ENODEV;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		pr_err("%s: unable to allocate mem\n", __func__);
+		return -ENOMEM;
+	}
+	info->cur_req_type = INVALID_TYPE;
+
+	info->baseaddr = of_iomap(node, 0);
+	if (WARN_ON(!info->baseaddr)) {
+		ret = -ENXIO;
+		goto mem_free;
+	}
+
+	info->irq = irq_of_parse_and_map(node, 0);
+
+	if (WARN_ON(!info->irq)) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+
+	readl_relaxed(info->baseaddr + PWC_STATUS);
+
+	ret = request_irq(info->irq, vexpress_spc_irq_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+		"arm-spc", info);
+
+	if (ret) {
+		pr_err("IRQ %d request failed\n", info->irq);
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	info->a15_clusid = readl_relaxed(info->baseaddr + A15_CONF) & 0xf;
+
+	vexpress_spc_config_bridge = vexpress_config_bridge_register(
+			node, &vexpress_spc_config_bridge_info);
+
+	if (WARN_ON(!vexpress_spc_config_bridge)) {
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	opp_func = vexpress_config_func_get(vexpress_spc_config_bridge, "opp");
+	perf_func =
+		vexpress_config_func_get(vexpress_spc_config_bridge, "perf");
+
+	if (!opp_func || !perf_func) {
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	if (vexpress_spc_populate_opps(0) || vexpress_spc_populate_opps(1)) {
+		if (info->irq)
+			free_irq(info->irq, info);
+		pr_err("failed to build OPP table\n");
+		ret = -ENODEV;
+		goto unmap;
+	}
+	/*
+	 * Multi-cluster systems may need this data when non-coherent, during
+	 * cluster power-up/power-down. Make sure it reaches main memory:
+	 */
+	sync_cache_w(info);
+	sync_cache_w(&info);
+	pr_info("vexpress-spc loaded at %p\n", info->baseaddr);
+	return 0;
+
+unmap:
+	iounmap(info->baseaddr);
+
+mem_free:
+	kfree(info);
+	return ret;
+}
+
+static bool __init __vexpress_spc_check_loaded(void);
+/*
+ * Pointer spc_check_loaded is swapped after init hence it is safe
+ * to initialize it to a function in the __init section
+ */
+static bool (*spc_check_loaded)(void) __refdata = &__vexpress_spc_check_loaded;
+
+static bool __init __vexpress_spc_check_loaded(void)
+{
+	if (vexpress_spc_load_result == -EAGAIN)
+		vexpress_spc_load_result = vexpress_spc_init();
+	spc_check_loaded = &vexpress_spc_initialized;
+	return vexpress_spc_initialized();
+}
+
+/*
+ * Function exported to manage early_initcall ordering.
+ * SPC code is needed very early in the boot process
+ * to bring CPUs out of reset and initialize power
+ * management back-end. After boot swap pointers to
+ * make the functionality check available to loadable
+ * modules, when early boot init functions have been
+ * already freed from kernel address space.
+ */
+bool vexpress_spc_check_loaded(void)
+{
+	return spc_check_loaded();
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_check_loaded);
+
+static int __init vexpress_spc_early_init(void)
+{
+	__vexpress_spc_check_loaded();
+	return vexpress_spc_load_result;
+}
+early_initcall(vexpress_spc_early_init);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Serial Power Controller (SPC) support");
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 96a020b1dcd1..7f429afce112 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -165,7 +165,7 @@ static u32 *vexpress_sysreg_config_data;
 static int vexpress_sysreg_config_tries;
 
 static void *vexpress_sysreg_config_func_get(struct device *dev,
-		struct device_node *node)
+		struct device_node *node, const char *id)
 {
 	struct vexpress_sysreg_config_func *config_func;
 	u32 site;
@@ -351,6 +351,8 @@ void __init vexpress_sysreg_of_early_init(void)
 }
 
 
+#ifdef CONFIG_GPIOLIB
+
 #define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \
 	[VEXPRESS_GPIO_##_name] = { \
 		.reg = _reg, \
@@ -445,6 +447,8 @@ struct gpio_led_platform_data vexpress_sysreg_leds_pdata = {
 	.leds = vexpress_sysreg_leds,
 };
 
+#endif
+
 
 static ssize_t vexpress_sysreg_sys_id_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
@@ -480,6 +484,9 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 	setup_timer(&vexpress_sysreg_config_timer,
 			vexpress_sysreg_config_complete, 0);
 
+	vexpress_sysreg_dev = &pdev->dev;
+
+#ifdef CONFIG_GPIOLIB
 	vexpress_sysreg_gpio_chip.dev = &pdev->dev;
 	err = gpiochip_add(&vexpress_sysreg_gpio_chip);
 	if (err) {
@@ -490,11 +497,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	vexpress_sysreg_dev = &pdev->dev;
-
 	platform_device_register_data(vexpress_sysreg_dev, "leds-gpio",
 			PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata,
 			sizeof(vexpress_sysreg_leds_pdata));
+#endif
 
 	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
 
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index bb4c1674ff99..ff9e99474039 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -97,7 +97,7 @@ config SMC911X
 
 config SMSC911X
 	tristate "SMSC LAN911x/LAN921x families embedded ethernet support"
-	depends on (ARM || SUPERH || BLACKFIN || MIPS || MN10300)
+	depends on HAS_IOMEM
 	select CRC32
 	select NET_CORE
 	select MII
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index dfbf978315df..bdd703c6bf16 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1896,7 +1896,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr,
 	SMC_SELECT_BANK(lp, 1);
 	val = SMC_GET_BASE(lp);
 	val = ((val & 0x1F00) >> 3) << SMC_IO_SHIFT;
-	if (((unsigned int)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) {
+	if (((unsigned long)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) {
 		printk("%s: IOADDR %p doesn't match configuration (%x).\n",
 			CARDNAME, ioaddr, val);
 	}
diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
index fcb94806f4e9..51285872481c 100644
--- a/drivers/net/ethernet/smsc/smc91x.h
+++ b/drivers/net/ethernet/smsc/smc91x.h
@@ -1110,8 +1110,7 @@ static const char * chip_ids[ 16 ] =  {
 			void __iomem *__ioaddr = ioaddr;		\
 			if (__len >= 2 && (unsigned long)__ptr & 2) {	\
 				__len -= 2;				\
-				SMC_outw(*(u16 *)__ptr, ioaddr,		\
-					DATA_REG(lp));		\
+				SMC_outsw(ioaddr, DATA_REG(lp), __ptr, 1); \
 				__ptr += 2;				\
 			}						\
 			if (SMC_CAN_USE_DATACS && lp->datacs)		\
@@ -1119,8 +1118,7 @@ static const char * chip_ids[ 16 ] =  {
 			SMC_outsl(__ioaddr, DATA_REG(lp), __ptr, __len>>2); \
 			if (__len & 2) {				\
 				__ptr += (__len & ~3);			\
-				SMC_outw(*((u16 *)__ptr), ioaddr,	\
-					 DATA_REG(lp));		\
+				SMC_outsw(ioaddr, DATA_REG(lp), __ptr, 1); \
 			}						\
 		} else if (SMC_16BIT(lp))				\
 			SMC_outsw(ioaddr, DATA_REG(lp), p, (l) >> 1);	\
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 38f0b312ff85..663d2d0448b7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	schedule_delayed_work(&phydev->state_queue, HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
 }
 
 /**
@@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	disable_irq_nosync(irq);
 	atomic_inc(&phydev->irq_disable);
 
-	schedule_work(&phydev->phy_queue);
+	queue_work(system_power_efficient_wq, &phydev->phy_queue);
 
 	return IRQ_HANDLED;
 }
@@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
 
 	/* reschedule state queue work to run as soon as possible */
 	cancel_delayed_work_sync(&phydev->state_queue);
-	schedule_delayed_work(&phydev->state_queue, 0);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
 
 	return;
 
@@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
 
-	schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+			PHY_STATE_TIME * HZ);
 }
 
 static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index d37bfcf5a3a2..a7bb5da6a96b 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -27,6 +27,7 @@ config OF_SELFTEST
 config OF_FLATTREE
 	bool
 	select DTC
+	select LIBFDT
 
 config OF_EARLY_FLATTREE
 	bool
@@ -83,4 +84,10 @@ config OF_MTD
 	depends on MTD
 	def_bool y
 
+config OF_RESERVED_MEM
+	depends on OF_EARLY_FLATTREE
+	bool
+	help
+	  Helpers to allow for reservation of memory regions
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index e027f444d10c..2aaa7b90fc14 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -11,3 +11,6 @@ obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
+obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
+
+CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/base.c b/drivers/of/base.c
index b60f9a77ab03..eda24f54b863 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -18,6 +18,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 #include <linux/ctype.h>
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/spinlock.h>
@@ -32,6 +33,7 @@ struct device_node *of_allnodes;
 EXPORT_SYMBOL(of_allnodes);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
+static struct device_node *of_stdout;
 
 DEFINE_MUTEX(of_aliases_mutex);
 
@@ -230,6 +232,100 @@ const void *of_get_property(const struct device_node *np, const char *name,
 }
 EXPORT_SYMBOL(of_get_property);
 
+/*
+ * arch_match_cpu_phys_id - Match the given logical CPU and physical id
+ *
+ * @cpu: logical cpu index of a core/thread
+ * @phys_id: physical identifier of a core/thread
+ *
+ * CPU logical to physical index mapping is architecture specific.
+ * However this __weak function provides a default match of physical
+ * id to logical cpu index. phys_id provided here is usually values read
+ * from the device tree which must match the hardware internal registers.
+ *
+ * Returns true if the physical identifier and the logical cpu index
+ * correspond to the same core/thread, false otherwise.
+ */
+bool __weak arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return (u32)phys_id == cpu;
+}
+
+/**
+ * Checks if the given "prop_name" property holds the physical id of the
+ * core/thread corresponding to the logical cpu 'cpu'. If 'thread' is not
+ * NULL, local thread number within the core is returned in it.
+ */
+static bool __of_find_n_match_cpu_property(struct device_node *cpun,
+			const char *prop_name, int cpu, unsigned int *thread)
+{
+	const __be32 *cell;
+	int ac, prop_len, tid;
+	u64 hwid;
+
+	ac = of_n_addr_cells(cpun);
+	cell = of_get_property(cpun, prop_name, &prop_len);
+	if (!cell)
+		return false;
+	prop_len /= sizeof(*cell);
+	for (tid = 0; tid < prop_len; tid++) {
+		hwid = of_read_number(cell, ac);
+		if (arch_match_cpu_phys_id(cpu, hwid)) {
+			if (thread)
+				*thread = tid;
+			return true;
+		}
+		cell += ac;
+	}
+	return false;
+}
+
+/**
+ * of_get_cpu_node - Get device node associated with the given logical CPU
+ *
+ * @cpu: CPU number(logical index) for which device node is required
+ * @thread: if not NULL, local thread number within the physical core is
+ *          returned
+ *
+ * The main purpose of this function is to retrieve the device node for the
+ * given logical CPU index. It should be used to initialize the of_node in
+ * cpu device. Once of_node in cpu device is populated, all the further
+ * references can use that instead.
+ *
+ * CPU logical to physical index mapping is architecture specific and is built
+ * before booting secondary cores. This function uses arch_match_cpu_phys_id
+ * which can be overridden by architecture specific implementation.
+ *
+ * Returns a node pointer for the logical cpu if found, else NULL.
+ */
+struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+{
+	struct device_node *cpun, *cpus;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (!cpus) {
+		pr_warn("Missing cpus node, bailing out\n");
+		return NULL;
+	}
+
+	for_each_child_of_node(cpus, cpun) {
+		if (of_node_cmp(cpun->type, "cpu"))
+			continue;
+		/* Check for non-standard "ibm,ppc-interrupt-server#s" property
+		 * for thread ids on PowerPC. If it doesn't exist fallback to
+		 * standard "reg" property.
+		 */
+		if (IS_ENABLED(CONFIG_PPC) &&
+			__of_find_n_match_cpu_property(cpun,
+				"ibm,ppc-interrupt-server#s", cpu, thread))
+			return cpun;
+		if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread))
+			return cpun;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(of_get_cpu_node);
+
 /** Checks if the given "compat" string matches one of the strings in
  * the device's "compatible" property
  */
@@ -1551,6 +1647,15 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 	of_chosen = of_find_node_by_path("/chosen");
 	if (of_chosen == NULL)
 		of_chosen = of_find_node_by_path("/chosen@0");
+
+	if (of_chosen) {
+		const char *name;
+
+		name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+		if (name)
+			of_stdout = of_find_node_by_path(name);
+	}
+
 	of_aliases = of_find_node_by_path("/aliases");
 	if (!of_aliases)
 		return;
@@ -1660,3 +1765,19 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
 	return curv;
 }
 EXPORT_SYMBOL_GPL(of_prop_next_string);
+
+/**
+ * of_device_is_stdout_path - check if a device node matches the
+ *                            linux,stdout-path property
+ *
+ * Check if this device node matches the linux,stdout-path property
+ * in the chosen node. return true if yes, false otherwise.
+ */
+int of_device_is_stdout_path(struct device_node *dn)
+{
+	if (!of_stdout)
+		return false;
+
+	return of_stdout == dn;
+}
+EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 118773751ea4..4911158cba8a 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -14,9 +14,13 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #ifdef CONFIG_PPC
@@ -25,54 +29,6 @@
 
 #include <asm/page.h>
 
-char *of_fdt_get_string(struct boot_param_header *blob, u32 offset)
-{
-	return ((char *)blob) +
-		be32_to_cpu(blob->off_dt_strings) + offset;
-}
-
-/**
- * of_fdt_get_property - Given a node in the given flat blob, return
- * the property ptr
- */
-void *of_fdt_get_property(struct boot_param_header *blob,
-		       unsigned long node, const char *name,
-		       unsigned long *size)
-{
-	unsigned long p = node;
-
-	do {
-		u32 tag = be32_to_cpup((__be32 *)p);
-		u32 sz, noff;
-		const char *nstr;
-
-		p += 4;
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag != OF_DT_PROP)
-			return NULL;
-
-		sz = be32_to_cpup((__be32 *)p);
-		noff = be32_to_cpup((__be32 *)(p + 4));
-		p += 8;
-		if (be32_to_cpu(blob->version) < 0x10)
-			p = ALIGN(p, sz >= 8 ? 8 : 4);
-
-		nstr = of_fdt_get_string(blob, noff);
-		if (nstr == NULL) {
-			pr_warning("Can't find property index name !\n");
-			return NULL;
-		}
-		if (strcmp(name, nstr) == 0) {
-			if (size)
-				*size = sz;
-			return (void *)p;
-		}
-		p += sz;
-		p = ALIGN(p, 4);
-	} while (1);
-}
-
 /**
  * of_fdt_is_compatible - Return true if given node from the given blob has
  * compat in its compatible list
@@ -87,9 +43,10 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
 		      unsigned long node, const char *compat)
 {
 	const char *cp;
-	unsigned long cplen, l, score = 0;
+	int cplen;
+	unsigned long l, score = 0;
 
-	cp = of_fdt_get_property(blob, node, "compatible", &cplen);
+	cp = fdt_getprop(blob, node, "compatible", &cplen);
 	if (cp == NULL)
 		return 0;
 	while (cplen > 0) {
@@ -125,12 +82,12 @@ int of_fdt_match(struct boot_param_header *blob, unsigned long node,
 	return score;
 }
 
-static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size,
+static void *unflatten_dt_alloc(void **mem, unsigned long size,
 				       unsigned long align)
 {
 	void *res;
 
-	*mem = ALIGN(*mem, align);
+	*mem = PTR_ALIGN(*mem, align);
 	res = (void *)*mem;
 	*mem += size;
 
@@ -146,30 +103,29 @@ static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size,
  * @allnextpp: pointer to ->allnext from last allocated device_node
  * @fpsize: Size of the node path up at the current depth.
  */
-static unsigned long unflatten_dt_node(struct boot_param_header *blob,
-				unsigned long mem,
-				unsigned long *p,
+static void * unflatten_dt_node(struct boot_param_header *blob,
+				void *mem,
+				int *poffset,
 				struct device_node *dad,
 				struct device_node ***allnextpp,
 				unsigned long fpsize)
 {
+	const __be32 *p;
 	struct device_node *np;
 	struct property *pp, **prev_pp = NULL;
-	char *pathp;
-	u32 tag;
+	const char *pathp;
 	unsigned int l, allocl;
+	static int depth = 0;
+	int old_depth;
+	int offset;
 	int has_name = 0;
 	int new_format = 0;
 
-	tag = be32_to_cpup((__be32 *)(*p));
-	if (tag != OF_DT_BEGIN_NODE) {
-		pr_err("Weird tag at start of node: %x\n", tag);
+	pathp = fdt_get_name(blob, *poffset, &l);
+	if (!pathp)
 		return mem;
-	}
-	*p += 4;
-	pathp = (char *)*p;
-	l = allocl = strlen(pathp) + 1;
-	*p = ALIGN(*p + l, 4);
+
+	allocl = l++;
 
 	/* version 0x10 has a more compact unit name here instead of the full
 	 * path. we accumulate the full path size using "fpsize", we'll rebuild
@@ -187,7 +143,7 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 			fpsize = 1;
 			allocl = 2;
 			l = 1;
-			*pathp = '\0';
+			pathp = "";
 		} else {
 			/* account for '/' and path size minus terminal 0
 			 * already in 'l'
@@ -235,32 +191,23 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 		kref_init(&np->kref);
 	}
 	/* process properties */
-	while (1) {
-		u32 sz, noff;
-		char *pname;
-
-		tag = be32_to_cpup((__be32 *)(*p));
-		if (tag == OF_DT_NOP) {
-			*p += 4;
-			continue;
-		}
-		if (tag != OF_DT_PROP)
+	for (offset = fdt_first_property_offset(blob, *poffset);
+	     (offset >= 0);
+	     (offset = fdt_next_property_offset(blob, offset))) {
+		const char *pname;
+		u32 sz;
+
+		if (!(p = fdt_getprop_by_offset(blob, offset, &pname, &sz))) {
+			offset = -FDT_ERR_INTERNAL;
 			break;
-		*p += 4;
-		sz = be32_to_cpup((__be32 *)(*p));
-		noff = be32_to_cpup((__be32 *)((*p) + 4));
-		*p += 8;
-		if (be32_to_cpu(blob->version) < 0x10)
-			*p = ALIGN(*p, sz >= 8 ? 8 : 4);
-
-		pname = of_fdt_get_string(blob, noff);
+		}
+
 		if (pname == NULL) {
 			pr_info("Can't find property name in list !\n");
 			break;
 		}
 		if (strcmp(pname, "name") == 0)
 			has_name = 1;
-		l = strlen(pname) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
 		if (allnextpp) {
@@ -272,26 +219,25 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 			if ((strcmp(pname, "phandle") == 0) ||
 			    (strcmp(pname, "linux,phandle") == 0)) {
 				if (np->phandle == 0)
-					np->phandle = be32_to_cpup((__be32*)*p);
+					np->phandle = be32_to_cpup(p);
 			}
 			/* And we process the "ibm,phandle" property
 			 * used in pSeries dynamic device tree
 			 * stuff */
 			if (strcmp(pname, "ibm,phandle") == 0)
-				np->phandle = be32_to_cpup((__be32 *)*p);
-			pp->name = pname;
+				np->phandle = be32_to_cpup(p);
+			pp->name = (char *)pname;
 			pp->length = sz;
-			pp->value = (void *)*p;
+			pp->value = (__be32 *)p;
 			*prev_pp = pp;
 			prev_pp = &pp->next;
 		}
-		*p = ALIGN((*p) + sz, 4);
 	}
 	/* with version 0x10 we may not have the name property, recreate
 	 * it here from the unit name if absent
 	 */
 	if (!has_name) {
-		char *p1 = pathp, *ps = pathp, *pa = NULL;
+		const char *p1 = pathp, *ps = pathp, *pa = NULL;
 		int sz;
 
 		while (*p1) {
@@ -328,19 +274,18 @@ static unsigned long unflatten_dt_node(struct boot_param_header *blob,
 		if (!np->type)
 			np->type = "<NULL>";
 	}
-	while (tag == OF_DT_BEGIN_NODE || tag == OF_DT_NOP) {
-		if (tag == OF_DT_NOP)
-			*p += 4;
-		else
-			mem = unflatten_dt_node(blob, mem, p, np, allnextpp,
-						fpsize);
-		tag = be32_to_cpup((__be32 *)(*p));
-	}
-	if (tag != OF_DT_END_NODE) {
-		pr_err("Weird tag at end of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
+
+	old_depth = depth;
+	*poffset = fdt_next_node(blob, *poffset, &depth);
+	if (depth < 0)
+		depth = 0;
+	while (*poffset > 0 && depth > old_depth)
+		mem = unflatten_dt_node(blob, mem, poffset, np, allnextpp,
+					fpsize);
+
+	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
+		pr_err("unflatten: error %d processing FDT\n", *poffset);
+
 	return mem;
 }
 
@@ -360,7 +305,9 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 			     struct device_node **mynodes,
 			     void * (*dt_alloc)(u64 size, u64 align))
 {
-	unsigned long start, mem, size;
+	unsigned long size;
+	int start;
+	void *mem;
 	struct device_node **allnextp = mynodes;
 
 	pr_debug(" -> unflatten_device_tree()\n");
@@ -381,30 +328,25 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 	}
 
 	/* First pass, scan for size */
-	start = ((unsigned long)blob) +
-		be32_to_cpu(blob->off_dt_struct);
-	size = unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
-	size = (size | 3) + 1;
+	start = 0;
+	size = (unsigned long)unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
+	size = ALIGN(size, 4);
 
 	pr_debug("  size is %lx, allocating...\n", size);
 
 	/* Allocate memory for the expanded device tree */
-	mem = (unsigned long)
-		dt_alloc(size + 4, __alignof__(struct device_node));
+	mem = dt_alloc(size + 4, __alignof__(struct device_node));
 
 	memset((void *)mem, 0, size);
 
 	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
 
-	pr_debug("  unflattening %lx...\n", mem);
+	pr_debug("  unflattening %p...\n", mem);
 
 	/* Second pass, do actual unflattening */
-	start = ((unsigned long)blob) +
-		be32_to_cpu(blob->off_dt_struct);
+	start = 0;
 	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
-	if (be32_to_cpup((__be32 *)start) != OF_DT_END)
-		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
-	if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef)
+	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpu(((__be32 *)mem)[size / 4]));
 	*allnextp = NULL;
@@ -443,6 +385,129 @@ struct boot_param_header *initial_boot_params;
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
 /**
+ * res_mem_reserve_reg() - reserve all memory described in 'reg' property
+ */
+static int __init __reserved_mem_reserve_reg(unsigned long node,
+					     const char *uname)
+{
+	int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
+	phys_addr_t base, size;
+	int len;
+	const __be32 *prop;
+	int nomap, first = 1;
+
+	prop = of_get_flat_dt_prop(node, "reg", &len);
+	if (!prop)
+		return -ENOENT;
+
+	if (len && len % t_len != 0) {
+		pr_err("Reserved memory: invalid reg property in '%s', skipping node.\n",
+		       uname);
+		return -EINVAL;
+	}
+
+	nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
+
+	while (len >= t_len) {
+		base = dt_mem_next_cell(dt_root_addr_cells, &prop);
+		size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+		if (base && size &&
+		    early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
+			pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n",
+				uname, &base, (unsigned long)size / SZ_1M);
+		else
+			pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %ld MiB\n",
+				uname, &base, (unsigned long)size / SZ_1M);
+
+		len -= t_len;
+		if (first) {
+			fdt_reserved_mem_save_node(node, uname, base, size);
+			first = 0;
+		}
+	}
+	return 0;
+}
+
+/**
+ * __reserved_mem_check_root() - check if #size-cells, #address-cells provided
+ * in /reserved-memory matches the values supported by the current implementation,
+ * also check if ranges property has been provided
+ */
+static int __init __reserved_mem_check_root(unsigned long node)
+{
+	const __be32 *prop;
+
+	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
+	if (!prop || be32_to_cpup(prop) != dt_root_size_cells)
+		return -EINVAL;
+
+	prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
+	if (!prop || be32_to_cpup(prop) != dt_root_addr_cells)
+		return -EINVAL;
+
+	prop = of_get_flat_dt_prop(node, "ranges", NULL);
+	if (!prop)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
+ */
+static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname,
+					  int depth, void *data)
+{
+	static int found;
+	const char *status;
+	int err;
+
+	if (!found && depth == 1 && strcmp(uname, "reserved-memory") == 0) {
+		if (__reserved_mem_check_root(node) != 0) {
+			pr_err("Reserved memory: unsupported node format, ignoring\n");
+			/* break scan */
+			return 1;
+		}
+		found = 1;
+		/* scan next node */
+		return 0;
+	} else if (!found) {
+		/* scan next node */
+		return 0;
+	} else if (found && depth < 2) {
+		/* scanning of /reserved-memory has been finished */
+		return 1;
+	}
+
+	status = of_get_flat_dt_prop(node, "status", NULL);
+	if (status && strcmp(status, "okay") != 0 && strcmp(status, "ok") != 0)
+		return 0;
+
+	err = __reserved_mem_reserve_reg(node, uname);
+	if (err == -ENOENT && of_get_flat_dt_prop(node, "size", NULL))
+		fdt_reserved_mem_save_node(node, uname, 0, 0);
+
+	/* scan next node */
+	return 0;
+}
+
+/**
+ * early_init_fdt_scan_reserved_mem() - create reserved memory regions
+ *
+ * This function grabs memory from early allocator for device exclusive use
+ * defined in device tree structures. It should be called by arch specific code
+ * once the early allocator (i.e. memblock) has been fully activated.
+ */
+void __init early_init_fdt_scan_reserved_mem(void)
+{
+	if (!initial_boot_params)
+		return;
+
+	of_scan_flat_dt(__fdt_scan_reserved_mem, NULL);
+	fdt_init_reserved_mem();
+}
+
+/**
  * of_scan_flat_dt - scan flattened tree blob and call callback on each.
  * @it: callback function
  * @data: context data pointer
@@ -456,47 +521,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 				     void *data),
 			   void *data)
 {
-	unsigned long p = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-	int rc = 0;
-	int depth = -1;
-
-	do {
-		u32 tag = be32_to_cpup((__be32 *)p);
-		const char *pathp;
-
-		p += 4;
-		if (tag == OF_DT_END_NODE) {
-			depth--;
-			continue;
-		}
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag == OF_DT_END)
-			break;
-		if (tag == OF_DT_PROP) {
-			u32 sz = be32_to_cpup((__be32 *)p);
-			p += 8;
-			if (be32_to_cpu(initial_boot_params->version) < 0x10)
-				p = ALIGN(p, sz >= 8 ? 8 : 4);
-			p += sz;
-			p = ALIGN(p, 4);
-			continue;
-		}
-		if (tag != OF_DT_BEGIN_NODE) {
-			pr_err("Invalid tag %x in flat device tree!\n", tag);
-			return -EINVAL;
-		}
-		depth++;
-		pathp = (char *)p;
-		p = ALIGN(p + strlen(pathp) + 1, 4);
+	const void *blob = initial_boot_params;
+	const char *pathp;
+	int offset, rc = 0, depth = -1;
+
+        for (offset = fdt_next_node(blob, -1, &depth);
+             offset >= 0 && depth >= 0 && !rc;
+             offset = fdt_next_node(blob, offset, &depth)) {
+
+		pathp = fdt_get_name(blob, offset, NULL);
 		if (*pathp == '/')
 			pathp = kbasename(pathp);
-		rc = it(p, pathp, depth, data);
-		if (rc != 0)
-			break;
-	} while (1);
-
+		rc = it(offset, pathp, depth, data);
+	}
 	return rc;
 }
 
@@ -505,14 +542,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
  */
 unsigned long __init of_get_flat_dt_root(void)
 {
-	unsigned long p = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-
-	while (be32_to_cpup((__be32 *)p) == OF_DT_NOP)
-		p += 4;
-	BUG_ON(be32_to_cpup((__be32 *)p) != OF_DT_BEGIN_NODE);
-	p += 4;
-	return ALIGN(p + strlen((char *)p) + 1, 4);
+	return 0;
 }
 
 /**
@@ -521,10 +551,10 @@ unsigned long __init of_get_flat_dt_root(void)
  * This function can be used within scan_flattened_dt callback to get
  * access to properties
  */
-void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size)
+const void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
+				       int *size)
 {
-	return of_fdt_get_property(initial_boot_params, node, name, size);
+	return fdt_getprop(initial_boot_params, node, name, size);
 }
 
 /**
@@ -545,6 +575,15 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat)
 	return of_fdt_match(initial_boot_params, node, compat);
 }
 
+struct fdt_scan_status {
+	const char *name;
+	int namelen;
+	int depth;
+	int found;
+	int (*iterator)(unsigned long node, const char *uname, int depth, void *data);
+	void *data;
+};
+
 #ifdef CONFIG_BLK_DEV_INITRD
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
@@ -552,23 +591,25 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat)
  */
 void __init early_init_dt_check_for_initrd(unsigned long node)
 {
-	unsigned long start, end, len;
-	__be32 *prop;
+	u64 start, end;
+	int len;
+	const __be32 *prop;
 
 	pr_debug("Looking for initrd properties... ");
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len);
 	if (!prop)
 		return;
-	start = of_read_ulong(prop, len/4);
+	start = of_read_number(prop, len/4);
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len);
 	if (!prop)
 		return;
-	end = of_read_ulong(prop, len/4);
+	end = of_read_number(prop, len/4);
 
 	early_init_dt_setup_initrd_arch(start, end);
-	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n", start, end);
+	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
+		 (unsigned long long)start, (unsigned long long)end);
 }
 #else
 inline void early_init_dt_check_for_initrd(unsigned long node)
@@ -582,7 +623,7 @@ inline void early_init_dt_check_for_initrd(unsigned long node)
 int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data)
 {
-	__be32 *prop;
+	const __be32 *prop;
 
 	if (depth != 0)
 		return 0;
@@ -604,9 +645,9 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 	return 1;
 }
 
-u64 __init dt_mem_next_cell(int s, __be32 **cellp)
+u64 __init dt_mem_next_cell(int s, const __be32 **cellp)
 {
-	__be32 *p = *cellp;
+	const __be32 *p = *cellp;
 
 	*cellp = p + s;
 	return of_read_number(p, s);
@@ -618,9 +659,9 @@ u64 __init dt_mem_next_cell(int s, __be32 **cellp)
 int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *reg, *endp;
-	unsigned long l;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *reg, *endp;
+	int l;
 
 	/* We are scanning "memory" nodes only */
 	if (type == NULL) {
@@ -641,7 +682,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 
 	endp = reg + (l / sizeof(__be32));
 
-	pr_debug("memory scan node %s, reg size %ld, data: %x %x %x %x,\n",
+	pr_debug("memory scan node %s, reg size %d, data: %x %x %x %x,\n",
 	    uname, l, reg[0], reg[1], reg[2], reg[3]);
 
 	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
@@ -664,8 +705,8 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	unsigned long l;
-	char *p;
+	int l;
+	const char *p;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
@@ -698,6 +739,80 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
+#ifdef CONFIG_HAVE_MEMBLOCK
+void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	const u64 phys_offset = __pa(PAGE_OFFSET);
+	base &= PAGE_MASK;
+	size &= PAGE_MASK;
+	if (base + size < phys_offset) {
+		pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
+			   base, base + size);
+		return;
+	}
+	if (base < phys_offset) {
+		pr_warning("Ignoring memory range 0x%llx - 0x%llx\n",
+			   base, phys_offset);
+		size -= phys_offset - base;
+		base = phys_offset;
+	}
+	memblock_add(base, size);
+}
+
+int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
+					phys_addr_t size, bool nomap)
+{
+	if (memblock_is_region_reserved(base, size))
+		return -EBUSY;
+	if (nomap)
+		return memblock_remove(base, size);
+	return memblock_reserve(base, size);
+}
+
+/*
+ * called from unflatten_device_tree() to bootstrap devicetree itself
+ * Architectures can override this definition if memblock isn't used
+ */
+void * __init __weak early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return __va(memblock_alloc(size, align));
+}
+#else
+int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
+					phys_addr_t size, bool nomap)
+{
+	pr_err("Reserved memory not supported, ignoring range 0x%llx - 0x%llx%s\n",
+		  base, size, nomap ? " (nomap)" : "");
+	return -ENOSYS;
+}
+#endif
+
+bool __init early_init_dt_scan(void *params)
+{
+	if (!params)
+		return false;
+
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/* check device tree validity */
+	if (be32_to_cpu(initial_boot_params->magic) != OF_DT_HEADER) {
+		initial_boot_params = NULL;
+		return false;
+	}
+
+	/* Retrieve various information from the /chosen node */
+	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+	/* Initialize {size,address}-cells info */
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+
+	/* Setup memory, calling early_init_dt_add_memory_arch */
+	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+
+	return true;
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
new file mode 100644
index 000000000000..e420eb52e5c9
--- /dev/null
+++ b/drivers/of/of_reserved_mem.c
@@ -0,0 +1,217 @@
+/*
+ * Device tree based initialization code for reserved memory.
+ *
+ * Copyright (c) 2013, The Linux Foundation. All Rights Reserved.
+ * Copyright (c) 2013,2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ * Author: Marek Szyprowski <m.szyprowski@samsung.com>
+ * Author: Josh Cartwright <joshc@codeaurora.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+#include <linux/of_reserved_mem.h>
+
+#define MAX_RESERVED_REGIONS	16
+static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
+static int reserved_mem_count;
+
+#if defined(CONFIG_HAVE_MEMBLOCK)
+#include <linux/memblock.h>
+int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
+	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
+	phys_addr_t *res_base)
+{
+	/*
+	 * We use __memblock_alloc_base() because memblock_alloc_base()
+	 * panic()s on allocation failure.
+	 */
+	phys_addr_t base = __memblock_alloc_base(size, align, end);
+	if (!base)
+		return -ENOMEM;
+
+	/*
+	 * Check if the allocated region fits in to start..end window
+	 */
+	if (base < start) {
+		memblock_free(base, size);
+		return -ENOMEM;
+	}
+
+	*res_base = base;
+	if (nomap)
+		return memblock_remove(base, size);
+	return 0;
+}
+#else
+int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
+	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
+	phys_addr_t *res_base)
+{
+	pr_err("Reserved memory not supported, ignoring region 0x%llx%s\n",
+		  size, nomap ? " (nomap)" : "");
+	return -ENOSYS;
+}
+#endif
+
+/**
+ * res_mem_save_node() - save fdt node for second pass initialization
+ */
+void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
+				      phys_addr_t base, phys_addr_t size)
+{
+	struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
+
+	if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) {
+		pr_err("Reserved memory: not enough space all defined regions.\n");
+		return;
+	}
+
+	rmem->fdt_node = node;
+	rmem->name = uname;
+	rmem->base = base;
+	rmem->size = size;
+
+	reserved_mem_count++;
+	return;
+}
+
+/**
+ * res_mem_alloc_size() - allocate reserved memory described by 'size', 'align'
+ *			  and 'alloc-ranges' properties
+ */
+static int __init __reserved_mem_alloc_size(unsigned long node,
+	const char *uname, phys_addr_t *res_base, phys_addr_t *res_size)
+{
+	int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
+	phys_addr_t start = 0, end = 0;
+	phys_addr_t base = 0, align = 0, size;
+	int len;
+	const __be32 *prop;
+	int nomap;
+	int ret;
+
+	prop = of_get_flat_dt_prop(node, "size", &len);
+	if (!prop)
+		return -EINVAL;
+
+	if (len != dt_root_size_cells * sizeof(__be32)) {
+		pr_err("Reserved memory: invalid size property in '%s' node.\n",
+				uname);
+		return -EINVAL;
+	}
+	size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+	nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
+
+	prop = of_get_flat_dt_prop(node, "alignment", &len);
+	if (prop) {
+		if (len != dt_root_addr_cells * sizeof(__be32)) {
+			pr_err("Reserved memory: invalid alignment property in '%s' node.\n",
+				uname);
+			return -EINVAL;
+		}
+		align = dt_mem_next_cell(dt_root_addr_cells, &prop);
+	}
+
+	prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
+	if (prop) {
+
+		if (len % t_len != 0) {
+			pr_err("Reserved memory: invalid alloc-ranges property in '%s', skipping node.\n",
+			       uname);
+			return -EINVAL;
+		}
+
+		base = 0;
+
+		while (len > 0) {
+			start = dt_mem_next_cell(dt_root_addr_cells, &prop);
+			end = start + dt_mem_next_cell(dt_root_size_cells,
+						       &prop);
+
+			ret = early_init_dt_alloc_reserved_memory_arch(size,
+					align, start, end, nomap, &base);
+			if (ret == 0) {
+				pr_debug("Reserved memory: allocated memory for '%s' node: base %pa, size %ld MiB\n",
+					uname, &base,
+					(unsigned long)size / SZ_1M);
+				break;
+			}
+			len -= t_len;
+		}
+
+	} else {
+		ret = early_init_dt_alloc_reserved_memory_arch(size, align,
+							0, 0, nomap, &base);
+		if (ret == 0)
+			pr_debug("Reserved memory: allocated memory for '%s' node: base %pa, size %ld MiB\n",
+				uname, &base, (unsigned long)size / SZ_1M);
+	}
+
+	if (base == 0) {
+		pr_info("Reserved memory: failed to allocate memory for node '%s'\n",
+			uname);
+		return -ENOMEM;
+	}
+
+	*res_base = base;
+	*res_size = size;
+
+	return 0;
+}
+
+static const struct of_device_id __rmem_of_table_sentinel
+	__used __section(__reservedmem_of_table_end);
+
+/**
+ * res_mem_init_node() - call region specific reserved memory init code
+ */
+static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
+{
+	extern const struct of_device_id __reservedmem_of_table[];
+	const struct of_device_id *i;
+
+	for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
+		reservedmem_of_init_fn initfn = i->data;
+		const char *compat = i->compatible;
+
+		if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
+			continue;
+
+		if (initfn(rmem, rmem->fdt_node, rmem->name) == 0) {
+			pr_info("Reserved memory: initialized node %s, compatible id %s\n",
+				rmem->name, compat);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+/**
+ * fdt_init_reserved_mem - allocate and init all saved reserved memory regions
+ */
+void __init fdt_init_reserved_mem(void)
+{
+	int i;
+	for (i = 0; i < reserved_mem_count; i++) {
+		struct reserved_mem *rmem = &reserved_mem[i];
+		unsigned long node = rmem->fdt_node;
+		int err = 0;
+
+		if (rmem->size == 0)
+			err = __reserved_mem_alloc_size(node, rmem->name,
+						 &rmem->base, &rmem->size);
+		if (err == 0)
+			__reserved_mem_init_node(rmem);
+	}
+}
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 9bdaeb8785ce..b32d6c798743 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -2,7 +2,7 @@
 
 ccflags-$(CONFIG_DEBUG_PINCTRL)	+= -DDEBUG
 
-obj-$(CONFIG_PINCTRL)		+= core.o
+obj-$(CONFIG_PINCTRL)		+= core.o pinctrl-utils.o
 obj-$(CONFIG_PINMUX)		+= pinmux.o
 obj-$(CONFIG_PINCONF)		+= pinconf.o
 ifeq ($(CONFIG_OF),y)
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 2ad5a8d337b5..872e74a23788 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -21,8 +21,10 @@
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinconf-generic.h>
+#include <linux/of.h>
 #include "core.h"
 #include "pinconf.h"
+#include "pinctrl-utils.h"
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -37,14 +39,19 @@ struct pin_config_item {
 static struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL),
 	PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL),
+	PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL),
+	PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
+				"input bias pull to pin specific state", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL),
+	PCONFDUMP(PIN_CONFIG_DRIVE_STRENGTH, "output drive strength", "mA"),
+	PCONFDUMP(PIN_CONFIG_INPUT_ENABLE, "input enabled", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL),
-	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
+	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "usec"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
 	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode"),
@@ -135,3 +142,198 @@ void pinconf_generic_dump_config(struct pinctrl_dev *pctldev,
 }
 EXPORT_SYMBOL_GPL(pinconf_generic_dump_config);
 #endif
+
+#ifdef CONFIG_OF
+struct pinconf_generic_dt_params {
+	const char * const property;
+	enum pin_config_param param;
+	u32 default_value;
+};
+
+static struct pinconf_generic_dt_params dt_params[] = {
+	{ "bias-disable", PIN_CONFIG_BIAS_DISABLE, 0 },
+	{ "bias-high-impedance", PIN_CONFIG_BIAS_HIGH_IMPEDANCE, 0 },
+	{ "bias-bus-hold", PIN_CONFIG_BIAS_BUS_HOLD, 0 },
+	{ "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 },
+	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
+	{ "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 },
+	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
+	{ "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 },
+	{ "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 },
+	{ "drive-strength", PIN_CONFIG_DRIVE_STRENGTH, 0 },
+	{ "input-enable", PIN_CONFIG_INPUT_ENABLE, 1 },
+	{ "input-disable", PIN_CONFIG_INPUT_ENABLE, 0 },
+	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
+	{ "input-schmitt-disable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 0 },
+	{ "input-debounce", PIN_CONFIG_INPUT_DEBOUNCE, 0 },
+	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
+	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
+	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
+	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
+	{ "slew-rate", PIN_CONFIG_SLEW_RATE, 0},
+};
+
+/**
+ * pinconf_generic_parse_dt_config()
+ * parse the config properties into generic pinconfig values.
+ * @np: node containing the pinconfig properties
+ * @configs: array with nconfigs entries containing the generic pinconf values
+ * @nconfigs: umber of configurations
+ */
+int pinconf_generic_parse_dt_config(struct device_node *np,
+				    unsigned long **configs,
+				    unsigned int *nconfigs)
+{
+	unsigned long *cfg;
+	unsigned int ncfg = 0;
+	int ret;
+	int i;
+	u32 val;
+
+	if (!np)
+		return -EINVAL;
+
+	/* allocate a temporary array big enough to hold one of each option */
+	cfg = kzalloc(sizeof(*cfg) * ARRAY_SIZE(dt_params), GFP_KERNEL);
+	if (!cfg)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
+		struct pinconf_generic_dt_params *par = &dt_params[i];
+		ret = of_property_read_u32(np, par->property, &val);
+
+		/* property not found */
+		if (ret == -EINVAL)
+			continue;
+
+		/* use default value, when no value is specified */
+		if (ret)
+			val = par->default_value;
+
+		pr_debug("found %s with value %u\n", par->property, val);
+		cfg[ncfg] = pinconf_to_config_packed(par->param, val);
+		ncfg++;
+	}
+
+	ret = 0;
+
+	/* no configs found at all */
+	if (ncfg == 0) {
+		*configs = NULL;
+		*nconfigs = 0;
+		goto out;
+	}
+
+	/*
+	 * Now limit the number of configs to the real number of
+	 * found properties.
+	 */
+	*configs = kzalloc(ncfg * sizeof(unsigned long), GFP_KERNEL);
+	if (!*configs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(*configs, cfg, ncfg * sizeof(unsigned long));
+	*nconfigs = ncfg;
+
+out:
+	kfree(cfg);
+	return ret;
+}
+
+int pinconf_generic_dt_subnode_to_map_new(struct pinctrl_dev *pctldev,
+		struct device_node *np, struct pinctrl_map **map,
+		unsigned *reserved_maps, unsigned *num_maps,
+		enum pinctrl_map_type type)
+{
+	int ret;
+	const char *function;
+	struct device *dev = pctldev->dev;
+	unsigned long *configs = NULL;
+	unsigned num_configs = 0;
+	unsigned reserve;
+	struct property *prop;
+	const char *group;
+
+	ret = of_property_read_string(np, "function", &function);
+	if (ret < 0) {
+		/* EINVAL=missing, which is fine since it's optional */
+		if (ret != -EINVAL)
+			dev_err(dev, "could not parse property function\n");
+		function = NULL;
+	}
+
+	ret = pinconf_generic_parse_dt_config(np, &configs, &num_configs);
+	if (ret < 0) {
+		dev_err(dev, "could not parse node property\n");
+		return ret;
+	}
+
+	reserve = 0;
+	if (function != NULL)
+		reserve++;
+	if (num_configs)
+		reserve++;
+	ret = of_property_count_strings(np, "pins");
+	if (ret < 0) {
+		dev_err(dev, "could not parse property pins\n");
+		goto exit;
+	}
+	reserve *= ret;
+
+	ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps,
+			num_maps, reserve);
+	if (ret < 0)
+		goto exit;
+
+	of_property_for_each_string(np, "pins", prop, group) {
+		if (function) {
+			ret = pinctrl_utils_add_map_mux(pctldev, map,
+					reserved_maps, num_maps, group,
+					function);
+			if (ret < 0)
+				goto exit;
+		}
+
+		if (num_configs) {
+			ret = pinctrl_utils_add_map_configs(pctldev, map,
+					reserved_maps, num_maps, group, configs,
+					num_configs, type);
+			if (ret < 0)
+				goto exit;
+		}
+	}
+	ret = 0;
+
+exit:
+	kfree(configs);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pinconf_generic_dt_subnode_to_map_new);
+
+int pinconf_generic_dt_node_to_map_new(struct pinctrl_dev *pctldev,
+		struct device_node *np_config, struct pinctrl_map **map,
+		unsigned *num_maps, enum pinctrl_map_type type)
+{
+	unsigned reserved_maps;
+	struct device_node *np;
+	int ret;
+
+	reserved_maps = 0;
+	*map = NULL;
+	*num_maps = 0;
+
+	for_each_child_of_node(np_config, np) {
+		ret = pinconf_generic_dt_subnode_to_map_new(pctldev, np, map,
+					&reserved_maps, num_maps, type);
+		if (ret < 0) {
+			pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+			return ret;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinconf_generic_dt_node_to_map_new);
+
+#endif
diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
index 694c3ace4520..596a2522a6b1 100644
--- a/drivers/pinctrl/pinconf.c
+++ b/drivers/pinctrl/pinconf.c
@@ -35,7 +35,9 @@ int pinconf_check_ops(struct pinctrl_dev *pctldev)
 		return -EINVAL;
 	}
 	/* We have to be able to config the pins in SOME way */
-	if (!ops->pin_config_set && !ops->pin_config_group_set) {
+	if (!ops->pin_config_set && !ops->pin_config_group_set
+		&& !ops->pin_config_set_bulk
+		&& !ops->pin_config_group_set_bulk) {
 		dev_err(pctldev->dev,
 			"pinconf has to be able to set a pins config\n");
 		return -EINVAL;
@@ -75,98 +77,6 @@ int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
 	return ops->pin_config_get(pctldev, pin, config);
 }
 
-/**
- * pin_config_get() - get the configuration of a single pin parameter
- * @dev_name: name of the pin controller device for this pin
- * @name: name of the pin to get the config for
- * @config: the config pointed to by this argument will be filled in with the
- *	current pin state, it can be used directly by drivers as a numeral, or
- *	it can be dereferenced to any struct.
- */
-int pin_config_get(const char *dev_name, const char *name,
-			  unsigned long *config)
-{
-	struct pinctrl_dev *pctldev;
-	int pin;
-
-	pctldev = get_pinctrl_dev_from_devname(dev_name);
-	if (!pctldev) {
-		pin = -EINVAL;
-		return pin;
-	}
-
-	mutex_lock(&pctldev->mutex);
-
-	pin = pin_get_from_name(pctldev, name);
-	if (pin < 0)
-		goto unlock;
-
-	pin = pin_config_get_for_pin(pctldev, pin, config);
-
-unlock:
-	mutex_unlock(&pctldev->mutex);
-	return pin;
-}
-EXPORT_SYMBOL(pin_config_get);
-
-static int pin_config_set_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
-			   unsigned long config)
-{
-	const struct pinconf_ops *ops = pctldev->desc->confops;
-	int ret;
-
-	if (!ops || !ops->pin_config_set) {
-		dev_err(pctldev->dev, "cannot configure pin, missing "
-			"config function in driver\n");
-		return -EINVAL;
-	}
-
-	ret = ops->pin_config_set(pctldev, pin, config);
-	if (ret) {
-		dev_err(pctldev->dev,
-			"unable to set pin configuration on pin %d\n", pin);
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- * pin_config_set() - set the configuration of a single pin parameter
- * @dev_name: name of pin controller device for this pin
- * @name: name of the pin to set the config for
- * @config: the config in this argument will contain the desired pin state, it
- *	can be used directly by drivers as a numeral, or it can be dereferenced
- *	to any struct.
- */
-int pin_config_set(const char *dev_name, const char *name,
-		   unsigned long config)
-{
-	struct pinctrl_dev *pctldev;
-	int pin, ret;
-
-	pctldev = get_pinctrl_dev_from_devname(dev_name);
-	if (!pctldev) {
-		ret = -EINVAL;
-		return ret;
-	}
-
-	mutex_lock(&pctldev->mutex);
-
-	pin = pin_get_from_name(pctldev, name);
-	if (pin < 0) {
-		ret = pin;
-		goto unlock;
-	}
-
-	ret = pin_config_set_for_pin(pctldev, pin, config);
-
-unlock:
-	mutex_unlock(&pctldev->mutex);
-	return ret;
-}
-EXPORT_SYMBOL(pin_config_set);
-
 int pin_config_group_get(const char *dev_name, const char *pin_group,
 			 unsigned long *config)
 {
@@ -204,88 +114,6 @@ unlock:
 	mutex_unlock(&pctldev->mutex);
 	return ret;
 }
-EXPORT_SYMBOL(pin_config_group_get);
-
-int pin_config_group_set(const char *dev_name, const char *pin_group,
-			 unsigned long config)
-{
-	struct pinctrl_dev *pctldev;
-	const struct pinconf_ops *ops;
-	const struct pinctrl_ops *pctlops;
-	int selector;
-	const unsigned *pins;
-	unsigned num_pins;
-	int ret;
-	int i;
-
-	pctldev = get_pinctrl_dev_from_devname(dev_name);
-	if (!pctldev) {
-		ret = -EINVAL;
-		return ret;
-	}
-
-	mutex_lock(&pctldev->mutex);
-
-	ops = pctldev->desc->confops;
-	pctlops = pctldev->desc->pctlops;
-
-	if (!ops || (!ops->pin_config_group_set && !ops->pin_config_set)) {
-		dev_err(pctldev->dev, "cannot configure pin group, missing "
-			"config function in driver\n");
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	selector = pinctrl_get_group_selector(pctldev, pin_group);
-	if (selector < 0) {
-		ret = selector;
-		goto unlock;
-	}
-
-	ret = pctlops->get_group_pins(pctldev, selector, &pins, &num_pins);
-	if (ret) {
-		dev_err(pctldev->dev, "cannot configure pin group, error "
-			"getting pins\n");
-		goto unlock;
-	}
-
-	/*
-	 * If the pin controller supports handling entire groups we use that
-	 * capability.
-	 */
-	if (ops->pin_config_group_set) {
-		ret = ops->pin_config_group_set(pctldev, selector, config);
-		/*
-		 * If the pin controller prefer that a certain group be handled
-		 * pin-by-pin as well, it returns -EAGAIN.
-		 */
-		if (ret != -EAGAIN)
-			goto unlock;
-	}
-
-	/*
-	 * If the controller cannot handle entire groups, we configure each pin
-	 * individually.
-	 */
-	if (!ops->pin_config_set) {
-		ret = 0;
-		goto unlock;
-	}
-
-	for (i = 0; i < num_pins; i++) {
-		ret = ops->pin_config_set(pctldev, pins[i], config);
-		if (ret < 0)
-			goto unlock;
-	}
-
-	ret = 0;
-
-unlock:
-	mutex_unlock(&pctldev->mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(pin_config_group_set);
 
 int pinconf_map_to_setting(struct pinctrl_map const *map,
 			  struct pinctrl_setting *setting)
@@ -332,7 +160,7 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting)
 {
 	struct pinctrl_dev *pctldev = setting->pctldev;
 	const struct pinconf_ops *ops = pctldev->desc->confops;
-	int i, ret;
+	int ret, i;
 
 	if (!ops) {
 		dev_err(pctldev->dev, "missing confops\n");
@@ -341,39 +169,66 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting)
 
 	switch (setting->type) {
 	case PIN_MAP_TYPE_CONFIGS_PIN:
-		if (!ops->pin_config_set) {
+		if (!ops->pin_config_set && !ops->pin_config_set_bulk) {
 			dev_err(pctldev->dev, "missing pin_config_set op\n");
 			return -EINVAL;
 		}
-		for (i = 0; i < setting->data.configs.num_configs; i++) {
-			ret = ops->pin_config_set(pctldev,
+		if (ops->pin_config_set_bulk) {
+			ret = ops->pin_config_set_bulk(pctldev,
 					setting->data.configs.group_or_pin,
-					setting->data.configs.configs[i]);
+					setting->data.configs.configs,
+					setting->data.configs.num_configs);
 			if (ret < 0) {
 				dev_err(pctldev->dev,
-					"pin_config_set op failed for pin %d config %08lx\n",
-					setting->data.configs.group_or_pin,
-					setting->data.configs.configs[i]);
+					"pin_config_set_bulk op failed for pin %d\n",
+					setting->data.configs.group_or_pin);
 				return ret;
 			}
+		} else if (ops->pin_config_set) {
+			for (i = 0; i < setting->data.configs.num_configs; i++) {
+				ret = ops->pin_config_set(pctldev,
+							  setting->data.configs.group_or_pin,
+							  setting->data.configs.configs[i]);
+				if (ret < 0) {
+					dev_err(pctldev->dev,
+						"pin_config_set op failed for pin %d config %08lx\n",
+						setting->data.configs.group_or_pin,
+						setting->data.configs.configs[i]);
+					return ret;
+				}
+			}
 		}
 		break;
 	case PIN_MAP_TYPE_CONFIGS_GROUP:
-		if (!ops->pin_config_group_set) {
+		if (!ops->pin_config_group_set &&
+		    !ops->pin_config_group_set_bulk) {
 			dev_err(pctldev->dev,
 				"missing pin_config_group_set op\n");
 			return -EINVAL;
 		}
-		for (i = 0; i < setting->data.configs.num_configs; i++) {
-			ret = ops->pin_config_group_set(pctldev,
+		if (ops->pin_config_group_set_bulk) {
+			ret = ops->pin_config_group_set_bulk(pctldev,
 					setting->data.configs.group_or_pin,
-					setting->data.configs.configs[i]);
+					setting->data.configs.configs,
+					setting->data.configs.num_configs);
 			if (ret < 0) {
 				dev_err(pctldev->dev,
-					"pin_config_group_set op failed for group %d config %08lx\n",
+					"pin_config_group_set_bulk op failed for group %d\n",
+					setting->data.configs.group_or_pin);
+				return ret;
+			}
+		} else if (ops->pin_config_group_set) {
+			for (i = 0; i < setting->data.configs.num_configs; i++) {
+				ret = ops->pin_config_group_set(pctldev,
 					setting->data.configs.group_or_pin,
 					setting->data.configs.configs[i]);
-				return ret;
+				if (ret < 0) {
+					dev_err(pctldev->dev,
+						"pin_config_group_set op failed for group %d config %08lx\n",
+						setting->data.configs.group_or_pin,
+						setting->data.configs.configs[i]);
+					return ret;
+				}
 			}
 		}
 		break;
diff --git a/drivers/pinctrl/pinconf.h b/drivers/pinctrl/pinconf.h
index 92c7267244d2..a4a5417e1413 100644
--- a/drivers/pinctrl/pinconf.h
+++ b/drivers/pinctrl/pinconf.h
@@ -123,3 +123,9 @@ static inline void pinconf_generic_dump_config(struct pinctrl_dev *pctldev,
 	return;
 }
 #endif
+
+#if defined(CONFIG_GENERIC_PINCONF) && defined(CONFIG_OF)
+int pinconf_generic_parse_dt_config(struct device_node *np,
+				    unsigned long **configs,
+				    unsigned int *nconfigs);
+#endif
diff --git a/drivers/pinctrl/pinctrl-utils.c b/drivers/pinctrl/pinctrl-utils.c
new file mode 100644
index 000000000000..48277e025f84
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-utils.c
@@ -0,0 +1,141 @@
+/*
+ * Utils functions to implement the pincontrol driver.
+ *
+ * Copyright (c) 2013, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include "core.h"
+#include "pinctrl-utils.h"
+
+int pinctrl_utils_reserve_map(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, unsigned reserve)
+{
+	unsigned old_num = *reserved_maps;
+	unsigned new_num = *num_maps + reserve;
+	struct pinctrl_map *new_map;
+
+	if (old_num >= new_num)
+		return 0;
+
+	new_map = krealloc(*map, sizeof(*new_map) * new_num, GFP_KERNEL);
+	if (!new_map) {
+		dev_err(pctldev->dev, "krealloc(map) failed\n");
+		return -ENOMEM;
+	}
+
+	memset(new_map + old_num, 0, (new_num - old_num) * sizeof(*new_map));
+
+	*map = new_map;
+	*reserved_maps = new_num;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinctrl_utils_reserve_map);
+
+int pinctrl_utils_add_map_mux(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, const char *group,
+		const char *function)
+{
+	if (WARN_ON(*num_maps == *reserved_maps))
+		return -ENOSPC;
+
+	(*map)[*num_maps].type = PIN_MAP_TYPE_MUX_GROUP;
+	(*map)[*num_maps].data.mux.group = group;
+	(*map)[*num_maps].data.mux.function = function;
+	(*num_maps)++;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinctrl_utils_add_map_mux);
+
+int pinctrl_utils_add_map_configs(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, const char *group,
+		unsigned long *configs, unsigned num_configs,
+		enum pinctrl_map_type type)
+{
+	unsigned long *dup_configs;
+
+	if (WARN_ON(*num_maps == *reserved_maps))
+		return -ENOSPC;
+
+	dup_configs = kmemdup(configs, num_configs * sizeof(*dup_configs),
+			      GFP_KERNEL);
+	if (!dup_configs) {
+		dev_err(pctldev->dev, "kmemdup(configs) failed\n");
+		return -ENOMEM;
+	}
+
+	(*map)[*num_maps].type = type;
+	(*map)[*num_maps].data.configs.group_or_pin = group;
+	(*map)[*num_maps].data.configs.configs = dup_configs;
+	(*map)[*num_maps].data.configs.num_configs = num_configs;
+	(*num_maps)++;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinctrl_utils_add_map_configs);
+
+int pinctrl_utils_add_config(struct pinctrl_dev *pctldev,
+		unsigned long **configs, unsigned *num_configs,
+		unsigned long config)
+{
+	unsigned old_num = *num_configs;
+	unsigned new_num = old_num + 1;
+	unsigned long *new_configs;
+
+	new_configs = krealloc(*configs, sizeof(*new_configs) * new_num,
+			       GFP_KERNEL);
+	if (!new_configs) {
+		dev_err(pctldev->dev, "krealloc(configs) failed\n");
+		return -ENOMEM;
+	}
+
+	new_configs[old_num] = config;
+
+	*configs = new_configs;
+	*num_configs = new_num;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pinctrl_utils_add_config);
+
+void pinctrl_utils_dt_free_map(struct pinctrl_dev *pctldev,
+	      struct pinctrl_map *map, unsigned num_maps)
+{
+	int i;
+
+	for (i = 0; i < num_maps; i++) {
+		switch (map[i].type) {
+		case PIN_MAP_TYPE_CONFIGS_GROUP:
+		case PIN_MAP_TYPE_CONFIGS_PIN:
+			kfree(map[i].data.configs.configs);
+			break;
+		default:
+			break;
+		}
+	}
+	kfree(map);
+}
+EXPORT_SYMBOL_GPL(pinctrl_utils_dt_free_map);
diff --git a/drivers/pinctrl/pinctrl-utils.h b/drivers/pinctrl/pinctrl-utils.h
new file mode 100644
index 000000000000..d0ffe1ce200f
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-utils.h
@@ -0,0 +1,43 @@
+/*
+ * Utils functions to implement the pincontrol driver.
+ *
+ * Copyright (c) 2013, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+#ifndef __PINCTRL_UTILS_H__
+#define __PINCTRL_UTILS_H__
+
+int pinctrl_utils_reserve_map(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, unsigned reserve);
+int pinctrl_utils_add_map_mux(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, const char *group,
+		const char *function);
+int pinctrl_utils_add_map_configs(struct pinctrl_dev *pctldev,
+		struct pinctrl_map **map, unsigned *reserved_maps,
+		unsigned *num_maps, const char *group,
+		unsigned long *configs, unsigned num_configs,
+		enum pinctrl_map_type type);
+int pinctrl_utils_add_config(struct pinctrl_dev *pctldev,
+		unsigned long **configs, unsigned *num_configs,
+		unsigned long config);
+void pinctrl_utils_dt_free_map(struct pinctrl_dev *pctldev,
+		struct pinctrl_map *map, unsigned num_maps);
+
+#endif /* __PINCTRL_UTILS_H__ */
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 349e9ae8090a..ee039dcead04 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -32,7 +32,8 @@ config POWER_RESET_RESTART
 	  user presses a key. u-boot then boots into Linux.
 
 config POWER_RESET_VEXPRESS
-	bool
+	bool "ARM Versatile Express power-off and reset driver"
+	depends on ARM || ARM64
 	depends on POWER_RESET
 	help
 	  Power off and reset support for the ARM Ltd. Versatile
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a86d12326137..874dd261b248 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1887,8 +1887,9 @@ int regulator_disable_deferred(struct regulator *regulator, int ms)
 	rdev->deferred_disables++;
 	mutex_unlock(&rdev->mutex);
 
-	ret = schedule_delayed_work(&rdev->disable_work,
-				    msecs_to_jiffies(ms));
+	ret = queue_delayed_work(system_power_efficient_wq,
+				 &rdev->disable_work,
+				 msecs_to_jiffies(ms));
 	if (ret < 0)
 		return ret;
 	else
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 5e3c02554d99..e92e1dcb6e3f 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -17,8 +17,30 @@ if THERMAL
 
 config THERMAL_HWMON
 	bool
+	prompt "Expose thermal sensors as hwmon device"
 	depends on HWMON=y || HWMON=THERMAL
 	default y
+	help
+	  In case a sensor is registered with the thermal
+	  framework, this option will also register it
+	  as a hwmon. The sensor will then have the common
+	  hwmon sysfs interface.
+
+	  Say 'Y' here if you want all thermal sensors to
+	  have hwmon sysfs interface too.
+
+config THERMAL_OF
+	bool
+	prompt "APIs to parse thermal data out of device tree"
+	depends on OF
+	default y
+	help
+	  This options provides helpers to add the support to
+	  read and parse thermal data definitions out of the
+	  device tree blob.
+
+	  Say 'Y' here if you need to build thermal infrastructure
+	  based on device tree.
 
 choice
 	prompt "Default Thermal governor"
@@ -69,6 +91,7 @@ config THERMAL_GOV_USER_SPACE
 config CPU_THERMAL
 	bool "generic cpu cooling support"
 	depends on CPU_FREQ
+	depends on THERMAL_OF
 	select CPU_FREQ_TABLE
 	help
 	  This implements the generic cpu cooling mechanism through frequency
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index c054d410ac3f..71f8eef49c1c 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -5,6 +5,10 @@
 obj-$(CONFIG_THERMAL)		+= thermal_sys.o
 thermal_sys-y			+= thermal_core.o
 
+# interface to/from other layers providing sensors
+thermal_sys-$(CONFIG_THERMAL_HWMON)		+= thermal_hwmon.o
+thermal_sys-$(CONFIG_THERMAL_OF)		+= of-thermal.o
+
 # governors
 thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)	+= fair_share.o
 thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE)	+= step_wise.o
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index c94bf2e5de62..4246262c4bd2 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -167,13 +167,20 @@ static int get_property(unsigned int cpu, unsigned long input,
 			continue;
 
 		/* get the frequency order */
-		if (freq != CPUFREQ_ENTRY_INVALID && descend != -1)
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
 			descend = !!(freq > table[i].frequency);
 
 		freq = table[i].frequency;
 		max_level++;
 	}
 
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
+	/* max_level is an index, not a counter */
+	max_level--;
+
 	/* get max level */
 	if (property == GET_MAXL) {
 		*output = (unsigned int)max_level;
@@ -181,7 +188,7 @@ static int get_property(unsigned int cpu, unsigned long input,
 	}
 
 	if (property == GET_FREQ)
-		level = descend ? input : (max_level - input - 1);
+		level = descend ? input : (max_level - input);
 
 	for (i = 0, j = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
 		/* ignore invalid entry */
@@ -197,7 +204,7 @@ static int get_property(unsigned int cpu, unsigned long input,
 
 		if (property == GET_LEVEL && (unsigned int)input == freq) {
 			/* get level by frequency */
-			*output = descend ? j : (max_level - j - 1);
+			*output = descend ? j : (max_level - j);
 			return 0;
 		}
 		if (property == GET_FREQ && level == j) {
@@ -322,6 +329,8 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 
 	if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
 		max_freq = notify_device->cpufreq_val;
+	else
+		return 0;
 
 	/* Never exceed user_policy.max */
 	if (max_freq > policy->user_policy.max)
@@ -415,18 +424,21 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
 };
 
 /**
- * cpufreq_cooling_register - function to create cpufreq cooling device.
+ * __cpufreq_cooling_register - helper function to create cpufreq cooling device
+ * @np: a valid struct device_node to the cooling device device tree node
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
- * cooling devices.
+ * cooling devices. It also gives the opportunity to link the cooling device
+ * with a device tree node, in order to bind it via the thermal DT code.
  *
  * Return: a valid struct thermal_cooling_device pointer on success,
  * on failure, it returns a corresponding ERR_PTR().
  */
-struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+static struct thermal_cooling_device *
+__cpufreq_cooling_register(struct device_node *np,
+			   const struct cpumask *clip_cpus)
 {
 	struct thermal_cooling_device *cool_dev;
 	struct cpufreq_cooling_device *cpufreq_dev = NULL;
@@ -465,12 +477,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
 		 cpufreq_dev->id);
 
-	cool_dev = thermal_cooling_device_register(dev_name, cpufreq_dev,
-						   &cpufreq_cooling_ops);
-	if (!cool_dev) {
+	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+						      &cpufreq_cooling_ops);
+	if (IS_ERR(cool_dev)) {
 		release_idr(&cpufreq_idr, cpufreq_dev->id);
 		kfree(cpufreq_dev);
-		return ERR_PTR(-EINVAL);
+		return cool_dev;
 	}
 	cpufreq_dev->cool_dev = cool_dev;
 	cpufreq_dev->cpufreq_state = 0;
@@ -486,9 +498,50 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
 
 	return cool_dev;
 }
+
+/**
+ * cpufreq_cooling_register - function to create cpufreq cooling device.
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ *
+ * This interface function registers the cpufreq cooling device with the name
+ * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+ * cooling devices.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_cooling_register(const struct cpumask *clip_cpus)
+{
+	return __cpufreq_cooling_register(NULL, clip_cpus);
+}
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
 /**
+ * of_cpufreq_cooling_register - function to create cpufreq cooling device.
+ * @np: a valid struct device_node to the cooling device device tree node
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ *
+ * This interface function registers the cpufreq cooling device with the name
+ * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+ * cooling devices. Using this API, the cpufreq cooling device will be
+ * linked to the device tree node provided.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+			    const struct cpumask *clip_cpus)
+{
+	if (!np)
+		return ERR_PTR(-EINVAL);
+
+	return __cpufreq_cooling_register(np, clip_cpus);
+}
+EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
+
+/**
  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  * @cdev: thermal cooling device pointer.
  *
@@ -496,8 +549,12 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	struct cpufreq_cooling_device *cpufreq_dev = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_dev;
+
+	if (!cdev)
+		return;
 
+	cpufreq_dev = cdev->devdata;
 	mutex_lock(&cooling_cpufreq_lock);
 	cpufreq_dev_count--;
 
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
new file mode 100644
index 000000000000..4b2b999b7611
--- /dev/null
+++ b/drivers/thermal/of-thermal.c
@@ -0,0 +1,850 @@
+/*
+ *  of-thermal.c - Generic Thermal Management device tree support.
+ *
+ *  Copyright (C) 2013 Texas Instruments
+ *  Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/string.h>
+
+#include "thermal_core.h"
+
+/***   Private data structures to represent thermal device tree data ***/
+
+/**
+ * struct __thermal_trip - representation of a point in temperature domain
+ * @np: pointer to struct device_node that this trip point was created from
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ */
+
+struct __thermal_trip {
+	struct device_node *np;
+	unsigned long int temperature;
+	unsigned long int hysteresis;
+	enum thermal_trip_type type;
+};
+
+/**
+ * struct __thermal_bind_param - a match between trip and cooling device
+ * @cooling_device: a pointer to identify the referred cooling device
+ * @trip_id: the trip point index
+ * @usage: the percentage (from 0 to 100) of cooling contribution
+ * @min: minimum cooling state used at this trip point
+ * @max: maximum cooling state used at this trip point
+ */
+
+struct __thermal_bind_params {
+	struct device_node *cooling_device;
+	unsigned int trip_id;
+	unsigned int usage;
+	unsigned long min;
+	unsigned long max;
+};
+
+/**
+ * struct __thermal_zone - internal representation of a thermal zone
+ * @mode: current thermal zone device mode (enabled/disabled)
+ * @passive_delay: polling interval while passive cooling is activated
+ * @polling_delay: zone polling interval
+ * @ntrips: number of trip points
+ * @trips: an array of trip points (0..ntrips - 1)
+ * @num_tbps: number of thermal bind params
+ * @tbps: an array of thermal bind params (0..num_tbps - 1)
+ * @sensor_data: sensor private data used while reading temperature and trend
+ * @get_temp: sensor callback to read temperature
+ * @get_trend: sensor callback to read temperature trend
+ */
+
+struct __thermal_zone {
+	enum thermal_device_mode mode;
+	int passive_delay;
+	int polling_delay;
+
+	/* trip data */
+	int ntrips;
+	struct __thermal_trip *trips;
+
+	/* cooling binding data */
+	int num_tbps;
+	struct __thermal_bind_params *tbps;
+
+	/* sensor interface */
+	void *sensor_data;
+	int (*get_temp)(void *, long *);
+	int (*get_trend)(void *, long *);
+};
+
+/***   DT thermal zone device callbacks   ***/
+
+static int of_thermal_get_temp(struct thermal_zone_device *tz,
+			       unsigned long *temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data->get_temp)
+		return -EINVAL;
+
+	return data->get_temp(data->sensor_data, temp);
+}
+
+static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
+				enum thermal_trend *trend)
+{
+	struct __thermal_zone *data = tz->devdata;
+	long dev_trend;
+	int r;
+
+	if (!data->get_trend)
+		return -EINVAL;
+
+	r = data->get_trend(data->sensor_data, &dev_trend);
+	if (r)
+		return r;
+
+	/* TODO: These intervals might have some thresholds, but in core code */
+	if (dev_trend > 0)
+		*trend = THERMAL_TREND_RAISING;
+	else if (dev_trend < 0)
+		*trend = THERMAL_TREND_DROPPING;
+	else
+		*trend = THERMAL_TREND_STABLE;
+
+	return 0;
+}
+
+static int of_thermal_bind(struct thermal_zone_device *thermal,
+			   struct thermal_cooling_device *cdev)
+{
+	struct __thermal_zone *data = thermal->devdata;
+	int i;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	/* find where to bind */
+	for (i = 0; i < data->num_tbps; i++) {
+		struct __thermal_bind_params *tbp = data->tbps + i;
+
+		if (tbp->cooling_device == cdev->np) {
+			int ret;
+
+			ret = thermal_zone_bind_cooling_device(thermal,
+						tbp->trip_id, cdev,
+						tbp->max,
+						tbp->min);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int of_thermal_unbind(struct thermal_zone_device *thermal,
+			     struct thermal_cooling_device *cdev)
+{
+	struct __thermal_zone *data = thermal->devdata;
+	int i;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	/* find where to unbind */
+	for (i = 0; i < data->num_tbps; i++) {
+		struct __thermal_bind_params *tbp = data->tbps + i;
+
+		if (tbp->cooling_device == cdev->np) {
+			int ret;
+
+			ret = thermal_zone_unbind_cooling_device(thermal,
+						tbp->trip_id, cdev);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int of_thermal_get_mode(struct thermal_zone_device *tz,
+			       enum thermal_device_mode *mode)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	*mode = data->mode;
+
+	return 0;
+}
+
+static int of_thermal_set_mode(struct thermal_zone_device *tz,
+			       enum thermal_device_mode mode)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	mutex_lock(&tz->lock);
+
+	if (mode == THERMAL_DEVICE_ENABLED)
+		tz->polling_delay = data->polling_delay;
+	else
+		tz->polling_delay = 0;
+
+	mutex_unlock(&tz->lock);
+
+	data->mode = mode;
+	thermal_zone_device_update(tz);
+
+	return 0;
+}
+
+static int of_thermal_get_trip_type(struct thermal_zone_device *tz, int trip,
+				    enum thermal_trip_type *type)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (trip >= data->ntrips || trip < 0)
+		return -EDOM;
+
+	*type = data->trips[trip].type;
+
+	return 0;
+}
+
+static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip,
+				    unsigned long *temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (trip >= data->ntrips || trip < 0)
+		return -EDOM;
+
+	*temp = data->trips[trip].temperature;
+
+	return 0;
+}
+
+static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip,
+				    unsigned long temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (trip >= data->ntrips || trip < 0)
+		return -EDOM;
+
+	/* thermal framework should take care of data->mask & (1 << trip) */
+	data->trips[trip].temperature = temp;
+
+	return 0;
+}
+
+static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip,
+				    unsigned long *hyst)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (trip >= data->ntrips || trip < 0)
+		return -EDOM;
+
+	*hyst = data->trips[trip].hysteresis;
+
+	return 0;
+}
+
+static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip,
+				    unsigned long hyst)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (trip >= data->ntrips || trip < 0)
+		return -EDOM;
+
+	/* thermal framework should take care of data->mask & (1 << trip) */
+	data->trips[trip].hysteresis = hyst;
+
+	return 0;
+}
+
+static int of_thermal_get_crit_temp(struct thermal_zone_device *tz,
+				    unsigned long *temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+	int i;
+
+	for (i = 0; i < data->ntrips; i++)
+		if (data->trips[i].type == THERMAL_TRIP_CRITICAL) {
+			*temp = data->trips[i].temperature;
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+static struct thermal_zone_device_ops of_thermal_ops = {
+	.get_mode = of_thermal_get_mode,
+	.set_mode = of_thermal_set_mode,
+
+	.get_trip_type = of_thermal_get_trip_type,
+	.get_trip_temp = of_thermal_get_trip_temp,
+	.set_trip_temp = of_thermal_set_trip_temp,
+	.get_trip_hyst = of_thermal_get_trip_hyst,
+	.set_trip_hyst = of_thermal_set_trip_hyst,
+	.get_crit_temp = of_thermal_get_crit_temp,
+
+	.bind = of_thermal_bind,
+	.unbind = of_thermal_unbind,
+};
+
+/***   sensor API   ***/
+
+static struct thermal_zone_device *
+thermal_zone_of_add_sensor(struct device_node *zone,
+			   struct device_node *sensor, void *data,
+			   int (*get_temp)(void *, long *),
+			   int (*get_trend)(void *, long *))
+{
+	struct thermal_zone_device *tzd;
+	struct __thermal_zone *tz;
+
+	tzd = thermal_zone_get_zone_by_name(zone->name);
+	if (IS_ERR(tzd))
+		return ERR_PTR(-EPROBE_DEFER);
+
+	tz = tzd->devdata;
+
+	mutex_lock(&tzd->lock);
+	tz->get_temp = get_temp;
+	tz->get_trend = get_trend;
+	tz->sensor_data = data;
+
+	tzd->ops->get_temp = of_thermal_get_temp;
+	tzd->ops->get_trend = of_thermal_get_trend;
+	mutex_unlock(&tzd->lock);
+
+	return tzd;
+}
+
+/**
+ * thermal_zone_of_sensor_register - registers a sensor to a DT thermal zone
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ *       a valid .of_node, for the sensor node.
+ * @sensor_id: a sensor identifier, in case the sensor IP has more
+ *             than one sensors
+ * @data: a private pointer (owned by the caller) that will be passed
+ *        back, when a temperature reading is needed.
+ * @get_temp: a pointer to a function that reads the sensor temperature.
+ * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ *
+ * This function will search the list of thermal zones described in device
+ * tree and look for the zone that refer to the sensor device pointed by
+ * @dev->of_node as temperature providers. For the zone pointing to the
+ * sensor node, the sensor will be added to the DT thermal zone device.
+ *
+ * The thermal zone temperature is provided by the @get_temp function
+ * pointer. When called, it will have the private pointer @data back.
+ *
+ * The thermal zone temperature trend is provided by the @get_trend function
+ * pointer. When called, it will have the private pointer @data back.
+ *
+ * TODO:
+ * 01 - This function must enqueue the new sensor instead of using
+ * it as the only source of temperature values.
+ *
+ * 02 - There must be a way to match the sensor with all thermal zones
+ * that refer to it.
+ *
+ * Return: On success returns a valid struct thermal_zone_device,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ */
+struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
+				void *data, int (*get_temp)(void *, long *),
+				int (*get_trend)(void *, long *))
+{
+	struct device_node *np, *child, *sensor_np;
+
+	np = of_find_node_by_name(NULL, "thermal-zones");
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	if (!dev || !dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	sensor_np = dev->of_node;
+
+	for_each_child_of_node(np, child) {
+		struct of_phandle_args sensor_specs;
+		int ret, id;
+
+		/* For now, thermal framework supports only 1 sensor per zone */
+		ret = of_parse_phandle_with_args(child, "thermal-sensors",
+						 "#thermal-sensor-cells",
+						 0, &sensor_specs);
+		if (ret)
+			continue;
+
+		if (sensor_specs.args_count >= 1) {
+			id = sensor_specs.args[0];
+			WARN(sensor_specs.args_count > 1,
+			     "%s: too many cells in sensor specifier %d\n",
+			     sensor_specs.np->name, sensor_specs.args_count);
+		} else {
+			id = 0;
+		}
+
+		if (sensor_specs.np == sensor_np && id == sensor_id) {
+			of_node_put(np);
+			return thermal_zone_of_add_sensor(child, sensor_np,
+							  data,
+							  get_temp,
+							  get_trend);
+		}
+	}
+	of_node_put(np);
+
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_register);
+
+/**
+ * thermal_zone_of_sensor_unregister - unregisters a sensor from a DT thermal zone
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ *       a valid .of_node, for the sensor node.
+ * @tzd: a pointer to struct thermal_zone_device where the sensor is registered.
+ *
+ * This function removes the sensor callbacks and private data from the
+ * thermal zone device registered with thermal_zone_of_sensor_register()
+ * API. It will also silent the zone by remove the .get_temp() and .get_trend()
+ * thermal zone device callbacks.
+ *
+ * TODO: When the support to several sensors per zone is added, this
+ * function must search the sensor list based on @dev parameter.
+ *
+ */
+void thermal_zone_of_sensor_unregister(struct device *dev,
+				       struct thermal_zone_device *tzd)
+{
+	struct __thermal_zone *tz;
+
+	if (!dev || !tzd || !tzd->devdata)
+		return;
+
+	tz = tzd->devdata;
+
+	/* no __thermal_zone, nothing to be done */
+	if (!tz)
+		return;
+
+	mutex_lock(&tzd->lock);
+	tzd->ops->get_temp = NULL;
+	tzd->ops->get_trend = NULL;
+
+	tz->get_temp = NULL;
+	tz->get_trend = NULL;
+	tz->sensor_data = NULL;
+	mutex_unlock(&tzd->lock);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
+
+/***   functions parsing device tree nodes   ***/
+
+/**
+ * thermal_of_populate_bind_params - parse and fill cooling map data
+ * @np: DT node containing a cooling-map node
+ * @__tbp: data structure to be filled with cooling map info
+ * @trips: array of thermal zone trip points
+ * @ntrips: number of trip points inside trips.
+ *
+ * This function parses a cooling-map type of node represented by
+ * @np parameter and fills the read data into @__tbp data structure.
+ * It needs the already parsed array of trip points of the thermal zone
+ * in consideration.
+ *
+ * Return: 0 on success, proper error code otherwise
+ */
+static int thermal_of_populate_bind_params(struct device_node *np,
+					   struct __thermal_bind_params *__tbp,
+					   struct __thermal_trip *trips,
+					   int ntrips)
+{
+	struct of_phandle_args cooling_spec;
+	struct device_node *trip;
+	int ret, i;
+	u32 prop;
+
+	/* Default weight. Usage is optional */
+	__tbp->usage = 0;
+	ret = of_property_read_u32(np, "contribution", &prop);
+	if (ret == 0)
+		__tbp->usage = prop;
+
+	trip = of_parse_phandle(np, "trip", 0);
+	if (!trip) {
+		pr_err("missing trip property\n");
+		return -ENODEV;
+	}
+
+	/* match using device_node */
+	for (i = 0; i < ntrips; i++)
+		if (trip == trips[i].np) {
+			__tbp->trip_id = i;
+			break;
+		}
+
+	if (i == ntrips) {
+		ret = -ENODEV;
+		goto end;
+	}
+
+	ret = of_parse_phandle_with_args(np, "cooling-device", "#cooling-cells",
+					 0, &cooling_spec);
+	if (ret < 0) {
+		pr_err("missing cooling_device property\n");
+		goto end;
+	}
+	__tbp->cooling_device = cooling_spec.np;
+	if (cooling_spec.args_count >= 2) { /* at least min and max */
+		__tbp->min = cooling_spec.args[0];
+		__tbp->max = cooling_spec.args[1];
+	} else {
+		pr_err("wrong reference to cooling device, missing limits\n");
+	}
+
+end:
+	of_node_put(trip);
+
+	return ret;
+}
+
+/**
+ * It maps 'enum thermal_trip_type' found in include/linux/thermal.h
+ * into the device tree binding of 'trip', property type.
+ */
+static const char * const trip_types[] = {
+	[THERMAL_TRIP_ACTIVE]	= "active",
+	[THERMAL_TRIP_PASSIVE]	= "passive",
+	[THERMAL_TRIP_HOT]	= "hot",
+	[THERMAL_TRIP_CRITICAL]	= "critical",
+};
+
+/**
+ * thermal_of_get_trip_type - Get phy mode for given device_node
+ * @np:	Pointer to the given device_node
+ * @type: Pointer to resulting trip type
+ *
+ * The function gets trip type string from property 'type',
+ * and store its index in trip_types table in @type,
+ *
+ * Return: 0 on success, or errno in error case.
+ */
+static int thermal_of_get_trip_type(struct device_node *np,
+				    enum thermal_trip_type *type)
+{
+	const char *t;
+	int err, i;
+
+	err = of_property_read_string(np, "type", &t);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(trip_types); i++)
+		if (!strcasecmp(t, trip_types[i])) {
+			*type = i;
+			return 0;
+		}
+
+	return -ENODEV;
+}
+
+/**
+ * thermal_of_populate_trip - parse and fill one trip point data
+ * @np: DT node containing a trip point node
+ * @trip: trip point data structure to be filled up
+ *
+ * This function parses a trip point type of node represented by
+ * @np parameter and fills the read data into @trip data structure.
+ *
+ * Return: 0 on success, proper error code otherwise
+ */
+static int thermal_of_populate_trip(struct device_node *np,
+				    struct __thermal_trip *trip)
+{
+	int prop;
+	int ret;
+
+	ret = of_property_read_u32(np, "temperature", &prop);
+	if (ret < 0) {
+		pr_err("missing temperature property\n");
+		return ret;
+	}
+	trip->temperature = prop;
+
+	ret = of_property_read_u32(np, "hysteresis", &prop);
+	if (ret < 0) {
+		pr_err("missing hysteresis property\n");
+		return ret;
+	}
+	trip->hysteresis = prop;
+
+	ret = thermal_of_get_trip_type(np, &trip->type);
+	if (ret < 0) {
+		pr_err("wrong trip type property\n");
+		return ret;
+	}
+
+	/* Required for cooling map matching */
+	trip->np = np;
+
+	return 0;
+}
+
+/**
+ * thermal_of_build_thermal_zone - parse and fill one thermal zone data
+ * @np: DT node containing a thermal zone node
+ *
+ * This function parses a thermal zone type of node represented by
+ * @np parameter and fills the read data into a __thermal_zone data structure
+ * and return this pointer.
+ *
+ * TODO: Missing properties to parse: thermal-sensor-names and coefficients
+ *
+ * Return: On success returns a valid struct __thermal_zone,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ */
+static struct __thermal_zone *
+thermal_of_build_thermal_zone(struct device_node *np)
+{
+	struct device_node *child = NULL, *gchild;
+	struct __thermal_zone *tz;
+	int ret, i;
+	u32 prop;
+
+	if (!np) {
+		pr_err("no thermal zone np\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	tz = kzalloc(sizeof(*tz), GFP_KERNEL);
+	if (!tz)
+		return ERR_PTR(-ENOMEM);
+
+	ret = of_property_read_u32(np, "polling-delay-passive", &prop);
+	if (ret < 0) {
+		pr_err("missing polling-delay-passive property\n");
+		goto free_tz;
+	}
+	tz->passive_delay = prop;
+
+	ret = of_property_read_u32(np, "polling-delay", &prop);
+	if (ret < 0) {
+		pr_err("missing polling-delay property\n");
+		goto free_tz;
+	}
+	tz->polling_delay = prop;
+
+	/* trips */
+	child = of_get_child_by_name(np, "trips");
+
+	/* No trips provided */
+	if (!child)
+		goto finish;
+
+	tz->ntrips = of_get_child_count(child);
+	if (tz->ntrips == 0) /* must have at least one child */
+		goto finish;
+
+	tz->trips = kzalloc(tz->ntrips * sizeof(*tz->trips), GFP_KERNEL);
+	if (!tz->trips) {
+		ret = -ENOMEM;
+		goto free_tz;
+	}
+
+	i = 0;
+	for_each_child_of_node(child, gchild) {
+		ret = thermal_of_populate_trip(gchild, &tz->trips[i++]);
+		if (ret)
+			goto free_trips;
+	}
+
+	of_node_put(child);
+
+	/* cooling-maps */
+	child = of_get_child_by_name(np, "cooling-maps");
+
+	/* cooling-maps not provided */
+	if (!child)
+		goto finish;
+
+	tz->num_tbps = of_get_child_count(child);
+	if (tz->num_tbps == 0)
+		goto finish;
+
+	tz->tbps = kzalloc(tz->num_tbps * sizeof(*tz->tbps), GFP_KERNEL);
+	if (!tz->tbps) {
+		ret = -ENOMEM;
+		goto free_trips;
+	}
+
+	i = 0;
+	for_each_child_of_node(child, gchild) {
+		ret = thermal_of_populate_bind_params(gchild, &tz->tbps[i++],
+						      tz->trips, tz->ntrips);
+		if (ret)
+			goto free_tbps;
+	}
+
+finish:
+	of_node_put(child);
+	tz->mode = THERMAL_DEVICE_DISABLED;
+
+	return tz;
+
+free_tbps:
+	kfree(tz->tbps);
+free_trips:
+	kfree(tz->trips);
+free_tz:
+	kfree(tz);
+	of_node_put(child);
+
+	return ERR_PTR(ret);
+}
+
+static inline void of_thermal_free_zone(struct __thermal_zone *tz)
+{
+	kfree(tz->tbps);
+	kfree(tz->trips);
+	kfree(tz);
+}
+
+/**
+ * of_parse_thermal_zones - parse device tree thermal data
+ *
+ * Initialization function that can be called by machine initialization
+ * code to parse thermal data and populate the thermal framework
+ * with hardware thermal zones info. This function only parses thermal zones.
+ * Cooling devices and sensor devices nodes are supposed to be parsed
+ * by their respective drivers.
+ *
+ * Return: 0 on success, proper error code otherwise
+ *
+ */
+int __init of_parse_thermal_zones(void)
+{
+	struct device_node *np, *child;
+	struct __thermal_zone *tz;
+	struct thermal_zone_device_ops *ops;
+
+	np = of_find_node_by_name(NULL, "thermal-zones");
+	if (!np) {
+		pr_debug("unable to find thermal zones\n");
+		return 0; /* Run successfully on systems without thermal DT */
+	}
+
+	for_each_child_of_node(np, child) {
+		struct thermal_zone_device *zone;
+		struct thermal_zone_params *tzp;
+
+		tz = thermal_of_build_thermal_zone(child);
+		if (IS_ERR(tz)) {
+			pr_err("failed to build thermal zone %s: %ld\n",
+			       child->name,
+			       PTR_ERR(tz));
+			continue;
+		}
+
+		ops = kmemdup(&of_thermal_ops, sizeof(*ops), GFP_KERNEL);
+		if (!ops)
+			goto exit_free;
+
+		tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);
+		if (!tzp) {
+			kfree(ops);
+			goto exit_free;
+		}
+
+		/* No hwmon because there might be hwmon drivers registering */
+		tzp->no_hwmon = true;
+
+		zone = thermal_zone_device_register(child->name, tz->ntrips,
+						    0, tz,
+						    ops, tzp,
+						    tz->passive_delay,
+						    tz->polling_delay);
+		if (IS_ERR(zone)) {
+			pr_err("Failed to build %s zone %ld\n", child->name,
+			       PTR_ERR(zone));
+			kfree(tzp);
+			kfree(ops);
+			of_thermal_free_zone(tz);
+			/* attempting to build remaining zones still */
+		}
+	}
+
+	return 0;
+
+exit_free:
+	of_thermal_free_zone(tz);
+
+	/* no memory available, so free what we have built */
+	of_thermal_destroy_zones();
+
+	return -ENOMEM;
+}
+
+/**
+ * of_thermal_destroy_zones - remove all zones parsed and allocated resources
+ *
+ * Finds all zones parsed and added to the thermal framework and remove them
+ * from the system, together with their resources.
+ *
+ */
+void of_thermal_destroy_zones(void)
+{
+	struct device_node *np, *child;
+
+	np = of_find_node_by_name(NULL, "thermal-zones");
+	if (!np) {
+		pr_err("unable to find thermal zones\n");
+		return;
+	}
+
+	for_each_child_of_node(np, child) {
+		struct thermal_zone_device *zone;
+
+		zone = thermal_zone_get_zone_by_name(child->name);
+		if (IS_ERR(zone))
+			continue;
+
+		thermal_zone_device_unregister(zone);
+		kfree(zone->tzp);
+		kfree(zone->ops);
+		of_thermal_free_zone(zone->devdata);
+	}
+}
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 4d4ddae1a991..c83a3097ef90 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -53,6 +53,7 @@ static unsigned long get_target_state(struct thermal_instance *instance,
 	unsigned long cur_state;
 
 	cdev->ops->get_cur_state(cdev, &cur_state);
+	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
 	switch (trend) {
 	case THERMAL_TREND_RAISING:
@@ -124,6 +125,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 	if (tz->temperature >= trip_temp)
 		throttle = true;
 
+	dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n",
+				trip, trip_type, trip_temp, trend, throttle);
+
 	mutex_lock(&tz->lock);
 
 	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
@@ -132,6 +136,8 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 
 		old_target = instance->target;
 		instance->target = get_target_state(instance, trend, throttle);
+		dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
+					old_target, (int)instance->target);
 
 		/* Activate a passive thermal instance */
 		if (old_target == THERMAL_NO_TARGET &&
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d755440791b7..edc0cb88f1d0 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -33,10 +33,13 @@
 #include <linux/idr.h>
 #include <linux/thermal.h>
 #include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
 #include "thermal_core.h"
+#include "thermal_hwmon.h"
 
 MODULE_AUTHOR("Zhang Rui");
 MODULE_DESCRIPTION("Generic thermal management sysfs support");
@@ -53,10 +56,15 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static struct thermal_governor *def_governor;
+
 static struct thermal_governor *__find_governor(const char *name)
 {
 	struct thermal_governor *pos;
 
+	if (!name || !name[0])
+		return def_governor;
+
 	list_for_each_entry(pos, &thermal_governor_list, governor_list)
 		if (!strnicmp(name, pos->name, THERMAL_NAME_LENGTH))
 			return pos;
@@ -79,17 +87,23 @@ int thermal_register_governor(struct thermal_governor *governor)
 	if (__find_governor(governor->name) == NULL) {
 		err = 0;
 		list_add(&governor->governor_list, &thermal_governor_list);
+		if (!def_governor && !strncmp(governor->name,
+			DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))
+			def_governor = governor;
 	}
 
 	mutex_lock(&thermal_list_lock);
 
 	list_for_each_entry(pos, &thermal_tz_list, node) {
+		/*
+		 * only thermal zones with specified tz->tzp->governor_name
+		 * may run with tz->govenor unset
+		 */
 		if (pos->governor)
 			continue;
-		if (pos->tzp)
-			name = pos->tzp->governor_name;
-		else
-			name = DEFAULT_THERMAL_GOVERNOR;
+
+		name = pos->tzp->governor_name;
+
 		if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH))
 			pos->governor = governor;
 	}
@@ -235,10 +249,11 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
 		if (!pos->tzp && !pos->ops->bind)
 			continue;
 
-		if (!pos->tzp && pos->ops->bind) {
+		if (pos->ops->bind) {
 			ret = pos->ops->bind(pos, cdev);
 			if (ret)
 				print_bind_err_msg(pos, cdev, ret);
+			continue;
 		}
 
 		tzp = pos->tzp;
@@ -269,8 +284,8 @@ static void bind_tz(struct thermal_zone_device *tz)
 
 	mutex_lock(&thermal_list_lock);
 
-	/* If there is no platform data, try to use ops->bind */
-	if (!tzp && tz->ops->bind) {
+	/* If there is ops->bind, try to use ops->bind */
+	if (tz->ops->bind) {
 		list_for_each_entry(pos, &thermal_cdev_list, node) {
 			ret = tz->ops->bind(tz, pos);
 			if (ret)
@@ -326,8 +341,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
 static void handle_non_critical_trips(struct thermal_zone_device *tz,
 			int trip, enum thermal_trip_type trip_type)
 {
-	if (tz->governor)
-		tz->governor->throttle(tz, trip);
+	tz->governor ? tz->governor->throttle(tz, trip) :
+		       def_governor->throttle(tz, trip);
 }
 
 static void handle_critical_trips(struct thermal_zone_device *tz,
@@ -388,7 +403,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
 	enum thermal_trip_type type;
 #endif
 
-	if (!tz || IS_ERR(tz))
+	if (!tz || IS_ERR(tz) || !tz->ops->get_temp)
 		goto exit;
 
 	mutex_lock(&tz->lock);
@@ -435,12 +450,18 @@ static void update_temperature(struct thermal_zone_device *tz)
 	tz->last_temperature = tz->temperature;
 	tz->temperature = temp;
 	mutex_unlock(&tz->lock);
+
+	dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+				tz->last_temperature, tz->temperature);
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
 	int count;
 
+	if (!tz->ops->get_temp)
+		return;
+
 	update_temperature(tz);
 
 	for (count = 0; count < tz->trips; count++)
@@ -756,6 +777,9 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
 		ret = tz->ops->set_emul_temp(tz, temperature);
 	}
 
+	if (!ret)
+		thermal_zone_device_update(tz);
+
 	return ret ? ret : count;
 }
 static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
@@ -854,260 +878,6 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* Device management */
 
-#if defined(CONFIG_THERMAL_HWMON)
-
-/* hwmon sys I/F */
-#include <linux/hwmon.h>
-
-/* thermal zone devices with the same type share one hwmon device */
-struct thermal_hwmon_device {
-	char type[THERMAL_NAME_LENGTH];
-	struct device *device;
-	int count;
-	struct list_head tz_list;
-	struct list_head node;
-};
-
-struct thermal_hwmon_attr {
-	struct device_attribute attr;
-	char name[16];
-};
-
-/* one temperature input for each thermal zone */
-struct thermal_hwmon_temp {
-	struct list_head hwmon_node;
-	struct thermal_zone_device *tz;
-	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
-	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
-};
-
-static LIST_HEAD(thermal_hwmon_list);
-
-static ssize_t
-name_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", hwmon->type);
-}
-static DEVICE_ATTR(name, 0444, name_show, NULL);
-
-static ssize_t
-temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	long temperature;
-	int ret;
-	struct thermal_hwmon_attr *hwmon_attr
-			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_hwmon_temp *temp
-			= container_of(hwmon_attr, struct thermal_hwmon_temp,
-				       temp_input);
-	struct thermal_zone_device *tz = temp->tz;
-
-	ret = thermal_zone_get_temp(tz, &temperature);
-
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%ld\n", temperature);
-}
-
-static ssize_t
-temp_crit_show(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	struct thermal_hwmon_attr *hwmon_attr
-			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_hwmon_temp *temp
-			= container_of(hwmon_attr, struct thermal_hwmon_temp,
-				       temp_crit);
-	struct thermal_zone_device *tz = temp->tz;
-	long temperature;
-	int ret;
-
-	ret = tz->ops->get_trip_temp(tz, 0, &temperature);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%ld\n", temperature);
-}
-
-
-static struct thermal_hwmon_device *
-thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_device *hwmon;
-
-	mutex_lock(&thermal_list_lock);
-	list_for_each_entry(hwmon, &thermal_hwmon_list, node)
-		if (!strcmp(hwmon->type, tz->type)) {
-			mutex_unlock(&thermal_list_lock);
-			return hwmon;
-		}
-	mutex_unlock(&thermal_list_lock);
-
-	return NULL;
-}
-
-/* Find the temperature input matching a given thermal zone */
-static struct thermal_hwmon_temp *
-thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
-			  const struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_temp *temp;
-
-	mutex_lock(&thermal_list_lock);
-	list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
-		if (temp->tz == tz) {
-			mutex_unlock(&thermal_list_lock);
-			return temp;
-		}
-	mutex_unlock(&thermal_list_lock);
-
-	return NULL;
-}
-
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_device *hwmon;
-	struct thermal_hwmon_temp *temp;
-	int new_hwmon_device = 1;
-	int result;
-
-	hwmon = thermal_hwmon_lookup_by_type(tz);
-	if (hwmon) {
-		new_hwmon_device = 0;
-		goto register_sys_interface;
-	}
-
-	hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
-	if (!hwmon)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&hwmon->tz_list);
-	strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH);
-	hwmon->device = hwmon_device_register(NULL);
-	if (IS_ERR(hwmon->device)) {
-		result = PTR_ERR(hwmon->device);
-		goto free_mem;
-	}
-	dev_set_drvdata(hwmon->device, hwmon);
-	result = device_create_file(hwmon->device, &dev_attr_name);
-	if (result)
-		goto free_mem;
-
- register_sys_interface:
-	temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
-	if (!temp) {
-		result = -ENOMEM;
-		goto unregister_name;
-	}
-
-	temp->tz = tz;
-	hwmon->count++;
-
-	snprintf(temp->temp_input.name, sizeof(temp->temp_input.name),
-		 "temp%d_input", hwmon->count);
-	temp->temp_input.attr.attr.name = temp->temp_input.name;
-	temp->temp_input.attr.attr.mode = 0444;
-	temp->temp_input.attr.show = temp_input_show;
-	sysfs_attr_init(&temp->temp_input.attr.attr);
-	result = device_create_file(hwmon->device, &temp->temp_input.attr);
-	if (result)
-		goto free_temp_mem;
-
-	if (tz->ops->get_crit_temp) {
-		unsigned long temperature;
-		if (!tz->ops->get_crit_temp(tz, &temperature)) {
-			snprintf(temp->temp_crit.name,
-				 sizeof(temp->temp_crit.name),
-				"temp%d_crit", hwmon->count);
-			temp->temp_crit.attr.attr.name = temp->temp_crit.name;
-			temp->temp_crit.attr.attr.mode = 0444;
-			temp->temp_crit.attr.show = temp_crit_show;
-			sysfs_attr_init(&temp->temp_crit.attr.attr);
-			result = device_create_file(hwmon->device,
-						    &temp->temp_crit.attr);
-			if (result)
-				goto unregister_input;
-		}
-	}
-
-	mutex_lock(&thermal_list_lock);
-	if (new_hwmon_device)
-		list_add_tail(&hwmon->node, &thermal_hwmon_list);
-	list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
-	mutex_unlock(&thermal_list_lock);
-
-	return 0;
-
- unregister_input:
-	device_remove_file(hwmon->device, &temp->temp_input.attr);
- free_temp_mem:
-	kfree(temp);
- unregister_name:
-	if (new_hwmon_device) {
-		device_remove_file(hwmon->device, &dev_attr_name);
-		hwmon_device_unregister(hwmon->device);
-	}
- free_mem:
-	if (new_hwmon_device)
-		kfree(hwmon);
-
-	return result;
-}
-
-static void
-thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_device *hwmon;
-	struct thermal_hwmon_temp *temp;
-
-	hwmon = thermal_hwmon_lookup_by_type(tz);
-	if (unlikely(!hwmon)) {
-		/* Should never happen... */
-		dev_dbg(&tz->device, "hwmon device lookup failed!\n");
-		return;
-	}
-
-	temp = thermal_hwmon_lookup_temp(hwmon, tz);
-	if (unlikely(!temp)) {
-		/* Should never happen... */
-		dev_dbg(&tz->device, "temperature input lookup failed!\n");
-		return;
-	}
-
-	device_remove_file(hwmon->device, &temp->temp_input.attr);
-	if (tz->ops->get_crit_temp)
-		device_remove_file(hwmon->device, &temp->temp_crit.attr);
-
-	mutex_lock(&thermal_list_lock);
-	list_del(&temp->hwmon_node);
-	kfree(temp);
-	if (!list_empty(&hwmon->tz_list)) {
-		mutex_unlock(&thermal_list_lock);
-		return;
-	}
-	list_del(&hwmon->node);
-	mutex_unlock(&thermal_list_lock);
-
-	device_remove_file(hwmon->device, &dev_attr_name);
-	hwmon_device_unregister(hwmon->device);
-	kfree(hwmon);
-}
-#else
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-	return 0;
-}
-
-static void
-thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-}
-#endif
-
 /**
  * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone
  * @tz:		pointer to struct thermal_zone_device
@@ -1287,7 +1057,8 @@ static struct class thermal_class = {
 };
 
 /**
- * thermal_cooling_device_register() - register a new thermal cooling device
+ * __thermal_cooling_device_register() - register a new thermal cooling device
+ * @np:		a pointer to a device tree node.
  * @type:	the thermal cooling device type.
  * @devdata:	device private data.
  * @ops:		standard thermal cooling devices callbacks.
@@ -1295,13 +1066,16 @@ static struct class thermal_class = {
  * This interface function adds a new thermal cooling device (fan/processor/...)
  * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
  * to all the thermal zone devices registered at the same time.
+ * It also gives the opportunity to link the cooling device to a device tree
+ * node, so that it can be bound to a thermal zone created out of device tree.
  *
  * Return: a pointer to the created struct thermal_cooling_device or an
  * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
  */
-struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
-				const struct thermal_cooling_device_ops *ops)
+static struct thermal_cooling_device *
+__thermal_cooling_device_register(struct device_node *np,
+				  char *type, void *devdata,
+				  const struct thermal_cooling_device_ops *ops)
 {
 	struct thermal_cooling_device *cdev;
 	int result;
@@ -1326,8 +1100,9 @@ thermal_cooling_device_register(char *type, void *devdata,
 	strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
 	mutex_init(&cdev->lock);
 	INIT_LIST_HEAD(&cdev->thermal_instances);
+	cdev->np = np;
 	cdev->ops = ops;
-	cdev->updated = true;
+	cdev->updated = false;
 	cdev->device.class = &thermal_class;
 	cdev->devdata = devdata;
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
@@ -1368,9 +1143,53 @@ unregister:
 	device_unregister(&cdev->device);
 	return ERR_PTR(result);
 }
+
+/**
+ * thermal_cooling_device_register() - register a new thermal cooling device
+ * @type:	the thermal cooling device type.
+ * @devdata:	device private data.
+ * @ops:		standard thermal cooling devices callbacks.
+ *
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+thermal_cooling_device_register(char *type, void *devdata,
+				const struct thermal_cooling_device_ops *ops)
+{
+	return __thermal_cooling_device_register(NULL, type, devdata, ops);
+}
 EXPORT_SYMBOL_GPL(thermal_cooling_device_register);
 
 /**
+ * thermal_of_cooling_device_register() - register an OF thermal cooling device
+ * @np:		a pointer to a device tree node.
+ * @type:	the thermal cooling device type.
+ * @devdata:	device private data.
+ * @ops:		standard thermal cooling devices callbacks.
+ *
+ * This function will register a cooling device with device tree node reference.
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np,
+				   char *type, void *devdata,
+				   const struct thermal_cooling_device_ops *ops)
+{
+	return __thermal_cooling_device_register(np, type, devdata, ops);
+}
+EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
+
+/**
  * thermal_cooling_device_unregister - removes the registered thermal cooling device
  * @cdev:	the thermal cooling device to remove.
  *
@@ -1442,6 +1261,8 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 	mutex_lock(&cdev->lock);
 	/* Make sure cdev enters the deepest cooling state */
 	list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
+		dev_dbg(&cdev->device, "zone%d->target=%lu\n",
+				instance->tz->id, instance->target);
 		if (instance->target == THERMAL_NO_TARGET)
 			continue;
 		if (instance->target > target)
@@ -1450,6 +1271,7 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 	mutex_unlock(&cdev->lock);
 	cdev->ops->set_cur_state(cdev, target);
 	cdev->updated = true;
+	dev_dbg(&cdev->device, "set to state %lu\n", target);
 }
 EXPORT_SYMBOL(thermal_cdev_update);
 
@@ -1605,7 +1427,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
  */
 struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int trips, int mask, void *devdata,
-	const struct thermal_zone_device_ops *ops,
+	struct thermal_zone_device_ops *ops,
 	const struct thermal_zone_params *tzp,
 	int passive_delay, int polling_delay)
 {
@@ -1621,7 +1443,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips)
 		return ERR_PTR(-EINVAL);
 
-	if (!ops || !ops->get_temp)
+	if (!ops)
 		return ERR_PTR(-EINVAL);
 
 	if (trips > 0 && !ops->get_trip_type)
@@ -1706,13 +1528,15 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (tz->tzp)
 		tz->governor = __find_governor(tz->tzp->governor_name);
 	else
-		tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR);
+		tz->governor = def_governor;
 
 	mutex_unlock(&thermal_governor_lock);
 
-	result = thermal_add_hwmon_sysfs(tz);
-	if (result)
-		goto unregister;
+	if (!tz->tzp || !tz->tzp->no_hwmon) {
+		result = thermal_add_hwmon_sysfs(tz);
+		if (result)
+			goto unregister;
+	}
 
 	mutex_lock(&thermal_list_lock);
 	list_add_tail(&tz->node, &thermal_tz_list);
@@ -1723,6 +1547,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
 	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
+	if (!tz->ops->get_temp)
+		thermal_zone_device_set_polling(tz, 0);
+
 	thermal_zone_device_update(tz);
 
 	if (!result)
@@ -1980,8 +1807,14 @@ static int __init thermal_init(void)
 	if (result)
 		goto unregister_class;
 
+	result = of_parse_thermal_zones();
+	if (result)
+		goto exit_netlink;
+
 	return 0;
 
+exit_netlink:
+	genetlink_exit();
 unregister_governors:
 	thermal_unregister_governors();
 unregister_class:
@@ -1997,6 +1830,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+	of_thermal_destroy_zones();
 	genetlink_exit();
 	class_unregister(&thermal_class);
 	thermal_unregister_governors();
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 7cf2f6626251..3db339fb636f 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -77,4 +77,13 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
 static inline void thermal_gov_user_space_unregister(void) {}
 #endif /* CONFIG_THERMAL_GOV_USER_SPACE */
 
+/* device tree support */
+#ifdef CONFIG_THERMAL_OF
+int of_parse_thermal_zones(void);
+void of_thermal_destroy_zones(void);
+#else
+static inline int of_parse_thermal_zones(void) { return 0; }
+static inline void of_thermal_destroy_zones(void) { }
+#endif
+
 #endif /* __THERMAL_CORE_H__ */
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c
new file mode 100644
index 000000000000..fdb07199d9c2
--- /dev/null
+++ b/drivers/thermal/thermal_hwmon.c
@@ -0,0 +1,269 @@
+/*
+ *  thermal_hwmon.c - Generic Thermal Management hwmon support.
+ *
+ *  Code based on Intel thermal_core.c. Copyrights of the original code:
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  Copyright (C) 2013 Texas Instruments
+ *  Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <linux/hwmon.h>
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include "thermal_hwmon.h"
+
+/* hwmon sys I/F */
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+	char type[THERMAL_NAME_LENGTH];
+	struct device *device;
+	int count;
+	struct list_head tz_list;
+	struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+	struct device_attribute attr;
+	char name[16];
+};
+
+/* one temperature input for each thermal zone */
+struct thermal_hwmon_temp {
+	struct list_head hwmon_node;
+	struct thermal_zone_device *tz;
+	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
+	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
+};
+
+static LIST_HEAD(thermal_hwmon_list);
+
+static DEFINE_MUTEX(thermal_hwmon_list_lock);
+
+static ssize_t
+name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n", hwmon->type);
+}
+static DEVICE_ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	long temperature;
+	int ret;
+	struct thermal_hwmon_attr *hwmon_attr
+			= container_of(attr, struct thermal_hwmon_attr, attr);
+	struct thermal_hwmon_temp *temp
+			= container_of(hwmon_attr, struct thermal_hwmon_temp,
+				       temp_input);
+	struct thermal_zone_device *tz = temp->tz;
+
+	ret = thermal_zone_get_temp(tz, &temperature);
+
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%ld\n", temperature);
+}
+
+static ssize_t
+temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct thermal_hwmon_attr *hwmon_attr
+			= container_of(attr, struct thermal_hwmon_attr, attr);
+	struct thermal_hwmon_temp *temp
+			= container_of(hwmon_attr, struct thermal_hwmon_temp,
+				       temp_crit);
+	struct thermal_zone_device *tz = temp->tz;
+	long temperature;
+	int ret;
+
+	ret = tz->ops->get_trip_temp(tz, 0, &temperature);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%ld\n", temperature);
+}
+
+
+static struct thermal_hwmon_device *
+thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_device *hwmon;
+
+	mutex_lock(&thermal_hwmon_list_lock);
+	list_for_each_entry(hwmon, &thermal_hwmon_list, node)
+		if (!strcmp(hwmon->type, tz->type)) {
+			mutex_unlock(&thermal_hwmon_list_lock);
+			return hwmon;
+		}
+	mutex_unlock(&thermal_hwmon_list_lock);
+
+	return NULL;
+}
+
+/* Find the temperature input matching a given thermal zone */
+static struct thermal_hwmon_temp *
+thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
+			  const struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_temp *temp;
+
+	mutex_lock(&thermal_hwmon_list_lock);
+	list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
+		if (temp->tz == tz) {
+			mutex_unlock(&thermal_hwmon_list_lock);
+			return temp;
+		}
+	mutex_unlock(&thermal_hwmon_list_lock);
+
+	return NULL;
+}
+
+int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_device *hwmon;
+	struct thermal_hwmon_temp *temp;
+	int new_hwmon_device = 1;
+	int result;
+
+	hwmon = thermal_hwmon_lookup_by_type(tz);
+	if (hwmon) {
+		new_hwmon_device = 0;
+		goto register_sys_interface;
+	}
+
+	hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&hwmon->tz_list);
+	strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH);
+	hwmon->device = hwmon_device_register(NULL);
+	if (IS_ERR(hwmon->device)) {
+		result = PTR_ERR(hwmon->device);
+		goto free_mem;
+	}
+	dev_set_drvdata(hwmon->device, hwmon);
+	result = device_create_file(hwmon->device, &dev_attr_name);
+	if (result)
+		goto free_mem;
+
+ register_sys_interface:
+	temp = kzalloc(sizeof(*temp), GFP_KERNEL);
+	if (!temp) {
+		result = -ENOMEM;
+		goto unregister_name;
+	}
+
+	temp->tz = tz;
+	hwmon->count++;
+
+	snprintf(temp->temp_input.name, sizeof(temp->temp_input.name),
+		 "temp%d_input", hwmon->count);
+	temp->temp_input.attr.attr.name = temp->temp_input.name;
+	temp->temp_input.attr.attr.mode = 0444;
+	temp->temp_input.attr.show = temp_input_show;
+	sysfs_attr_init(&temp->temp_input.attr.attr);
+	result = device_create_file(hwmon->device, &temp->temp_input.attr);
+	if (result)
+		goto free_temp_mem;
+
+	if (tz->ops->get_crit_temp) {
+		unsigned long temperature;
+		if (!tz->ops->get_crit_temp(tz, &temperature)) {
+			snprintf(temp->temp_crit.name,
+				 sizeof(temp->temp_crit.name),
+				"temp%d_crit", hwmon->count);
+			temp->temp_crit.attr.attr.name = temp->temp_crit.name;
+			temp->temp_crit.attr.attr.mode = 0444;
+			temp->temp_crit.attr.show = temp_crit_show;
+			sysfs_attr_init(&temp->temp_crit.attr.attr);
+			result = device_create_file(hwmon->device,
+						    &temp->temp_crit.attr);
+			if (result)
+				goto unregister_input;
+		}
+	}
+
+	mutex_lock(&thermal_hwmon_list_lock);
+	if (new_hwmon_device)
+		list_add_tail(&hwmon->node, &thermal_hwmon_list);
+	list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
+	mutex_unlock(&thermal_hwmon_list_lock);
+
+	return 0;
+
+ unregister_input:
+	device_remove_file(hwmon->device, &temp->temp_input.attr);
+ free_temp_mem:
+	kfree(temp);
+ unregister_name:
+	if (new_hwmon_device) {
+		device_remove_file(hwmon->device, &dev_attr_name);
+		hwmon_device_unregister(hwmon->device);
+	}
+ free_mem:
+	if (new_hwmon_device)
+		kfree(hwmon);
+
+	return result;
+}
+
+void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_device *hwmon;
+	struct thermal_hwmon_temp *temp;
+
+	hwmon = thermal_hwmon_lookup_by_type(tz);
+	if (unlikely(!hwmon)) {
+		/* Should never happen... */
+		dev_dbg(&tz->device, "hwmon device lookup failed!\n");
+		return;
+	}
+
+	temp = thermal_hwmon_lookup_temp(hwmon, tz);
+	if (unlikely(!temp)) {
+		/* Should never happen... */
+		dev_dbg(&tz->device, "temperature input lookup failed!\n");
+		return;
+	}
+
+	device_remove_file(hwmon->device, &temp->temp_input.attr);
+	if (tz->ops->get_crit_temp)
+		device_remove_file(hwmon->device, &temp->temp_crit.attr);
+
+	mutex_lock(&thermal_hwmon_list_lock);
+	list_del(&temp->hwmon_node);
+	kfree(temp);
+	if (!list_empty(&hwmon->tz_list)) {
+		mutex_unlock(&thermal_hwmon_list_lock);
+		return;
+	}
+	list_del(&hwmon->node);
+	mutex_unlock(&thermal_hwmon_list_lock);
+
+	device_remove_file(hwmon->device, &dev_attr_name);
+	hwmon_device_unregister(hwmon->device);
+	kfree(hwmon);
+}
diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h
new file mode 100644
index 000000000000..c798fdb2ae43
--- /dev/null
+++ b/drivers/thermal/thermal_hwmon.h
@@ -0,0 +1,49 @@
+/*
+ *  thermal_hwmon.h - Generic Thermal Management hwmon support.
+ *
+ *  Code based on Intel thermal_core.c. Copyrights of the original code:
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  Copyright (C) 2013 Texas Instruments
+ *  Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef __THERMAL_HWMON_H__
+#define __THERMAL_HWMON_H__
+
+#include <linux/thermal.h>
+
+#ifdef CONFIG_THERMAL_HWMON
+int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz);
+void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz);
+#else
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+	return 0;
+}
+
+static void
+thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+}
+#endif
+
+#endif /* __THERMAL_HWMON_H__ */
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 721904f8efa9..4858b8a99d3b 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -35,18 +35,8 @@
 #include <linux/serial_8250.h>
 #include <asm/io.h>
 #include <asm/serial.h>
-#ifdef CONFIG_FIX_EARLYCON_MEM
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#endif
 
-struct early_serial8250_device {
-	struct uart_port port;
-	char options[16];		/* e.g., 115200n8 */
-	unsigned int baud;
-};
-
-static struct early_serial8250_device early_device;
+static struct earlycon_device *early_device;
 
 unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offset)
 {
@@ -100,7 +90,7 @@ static void __init serial_putc(struct uart_port *port, int c)
 static void __init early_serial8250_write(struct console *console,
 					const char *s, unsigned int count)
 {
-	struct uart_port *port = &early_device.port;
+	struct uart_port *port = &early_device->port;
 	unsigned int ier;
 
 	/* Save the IER and disable interrupts */
@@ -129,7 +119,7 @@ static unsigned int __init probe_baud(struct uart_port *port)
 	return (port->uartclk / 16) / quot;
 }
 
-static void __init init_port(struct early_serial8250_device *device)
+static void __init init_port(struct earlycon_device *device)
 {
 	struct uart_port *port = &device->port;
 	unsigned int divisor;
@@ -148,127 +138,45 @@ static void __init init_port(struct early_serial8250_device *device)
 	serial8250_early_out(port, UART_LCR, c & ~UART_LCR_DLAB);
 }
 
-static int __init parse_options(struct early_serial8250_device *device,
-								char *options)
+static int __init early_serial8250_setup(struct earlycon_device *device,
+					 const char *options)
 {
-	struct uart_port *port = &device->port;
-	int mmio, mmio32, length;
-
-	if (!options)
-		return -ENODEV;
-
-	port->uartclk = BASE_BAUD * 16;
-
-	mmio = !strncmp(options, "mmio,", 5);
-	mmio32 = !strncmp(options, "mmio32,", 7);
-	if (mmio || mmio32) {
-		port->iotype = (mmio ? UPIO_MEM : UPIO_MEM32);
-		port->mapbase = simple_strtoul(options + (mmio ? 5 : 7),
-					       &options, 0);
-		if (mmio32)
-			port->regshift = 2;
-#ifdef CONFIG_FIX_EARLYCON_MEM
-		set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
-					port->mapbase & PAGE_MASK);
-		port->membase =
-			(void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
-		port->membase += port->mapbase & ~PAGE_MASK;
-#else
-		port->membase = ioremap_nocache(port->mapbase, 64);
-		if (!port->membase) {
-			printk(KERN_ERR "%s: Couldn't ioremap 0x%llx\n",
-				__func__,
-			       (unsigned long long) port->mapbase);
-			return -ENOMEM;
-		}
-#endif
-	} else if (!strncmp(options, "io,", 3)) {
-		port->iotype = UPIO_PORT;
-		port->iobase = simple_strtoul(options + 3, &options, 0);
-		mmio = 0;
-	} else
-		return -EINVAL;
+	if (!(device->port.membase || device->port.iobase))
+		return 0;
 
-	options = strchr(options, ',');
-	if (options) {
-		options++;
-		device->baud = simple_strtoul(options, NULL, 0);
-		length = min(strcspn(options, " "), sizeof(device->options));
-		strlcpy(device->options, options, length);
-	} else {
-		device->baud = probe_baud(port);
+	if (!device->baud) {
+		device->baud = probe_baud(&device->port);
 		snprintf(device->options, sizeof(device->options), "%u",
-			device->baud);
+			 device->baud);
 	}
 
-	if (mmio || mmio32)
-		printk(KERN_INFO
-		       "Early serial console at MMIO%s 0x%llx (options '%s')\n",
-			mmio32 ? "32" : "",
-			(unsigned long long)port->mapbase,
-			device->options);
-	else
-		printk(KERN_INFO
-		      "Early serial console at I/O port 0x%lx (options '%s')\n",
-			port->iobase,
-			device->options);
-
-	return 0;
-}
-
-static struct console early_serial8250_console __initdata = {
-	.name	= "uart",
-	.write	= early_serial8250_write,
-	.flags	= CON_PRINTBUFFER | CON_BOOT,
-	.index	= -1,
-};
-
-static int __init early_serial8250_setup(char *options)
-{
-	struct early_serial8250_device *device = &early_device;
-	int err;
-
-	if (device->port.membase || device->port.iobase)
-		return 0;
-
-	err = parse_options(device, options);
-	if (err < 0)
-		return err;
-
 	init_port(device);
+
+	early_device = device;
+	device->con->write = early_serial8250_write;
 	return 0;
 }
+EARLYCON_DECLARE(uart8250, early_serial8250_setup);
+EARLYCON_DECLARE(uart, early_serial8250_setup);
 
 int __init setup_early_serial8250_console(char *cmdline)
 {
-	char *options;
-	int err;
+	char match[] = "uart8250";
 
-	options = strstr(cmdline, "uart8250,");
-	if (!options) {
-		options = strstr(cmdline, "uart,");
-		if (!options)
-			return 0;
-	}
-
-	options = strchr(cmdline, ',') + 1;
-	err = early_serial8250_setup(options);
-	if (err < 0)
-		return err;
-
-	register_console(&early_serial8250_console);
+	if (cmdline && cmdline[4] == ',')
+		match[4] = '\0';
 
-	return 0;
+	return setup_earlycon(cmdline, match, early_serial8250_setup);
 }
 
 int serial8250_find_port_for_earlycon(void)
 {
-	struct early_serial8250_device *device = &early_device;
-	struct uart_port *port = &device->port;
+	struct earlycon_device *device = early_device;
+	struct uart_port *port = device ? &device->port : NULL;
 	int line;
 	int ret;
 
-	if (!device->port.membase && !device->port.iobase)
+	if (!port || (!port->membase && !port->iobase))
 		return -ENODEV;
 
 	line = serial8250_find_port(port);
@@ -283,5 +191,3 @@ int serial8250_find_port_for_earlycon(void)
 
 	return ret;
 }
-
-early_param("earlycon", setup_early_serial8250_console);
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index 80fe91e64a52..902c4bf15f0b 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -62,6 +62,7 @@ config SERIAL_8250_CONSOLE
 	bool "Console on 8250/16550 and compatible serial port"
 	depends on SERIAL_8250=y
 	select SERIAL_CORE_CONSOLE
+	select SERIAL_EARLYCON
 	---help---
 	  If you say Y here, it will be possible to use a serial port as the
 	  system console (the system console is the device which receives all
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 7e7006fd404e..1ad9aadc0055 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -7,6 +7,13 @@ if TTY
 menu "Serial drivers"
 	depends on HAS_IOMEM && GENERIC_HARDIRQS
 
+config SERIAL_EARLYCON
+	bool
+	help
+	  Support for early consoles with the earlycon parameter. This enables
+	  the console before standard serial driver is probed. The console is
+	  enabled when early_param is processed.
+
 source "drivers/tty/serial/8250/Kconfig"
 
 comment "Non-8250 serial port support"
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index eedfec40e3dd..e51c680d6044 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -5,6 +5,8 @@
 obj-$(CONFIG_SERIAL_CORE) += serial_core.o
 obj-$(CONFIG_SERIAL_21285) += 21285.o
 
+obj-$(CONFIG_SERIAL_EARLYCON) += earlycon.o
+
 # These Sparc drivers have to appear before others such as 8250
 # which share ttySx minor node space.  Otherwise console device
 # names change and other unplesantries.
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 13471dd95793..d983370eb05b 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -231,7 +231,9 @@ static void altera_uart_rx_chars(struct altera_uart *pp)
 				 flag);
 	}
 
+	spin_unlock(&port->lock);
 	tty_flip_buffer_push(&port->state->port);
+	spin_lock(&port->lock);
 }
 
 static void altera_uart_tx_chars(struct altera_uart *pp)
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
new file mode 100644
index 000000000000..4f27f788ac6f
--- /dev/null
+++ b/drivers/tty/serial/earlycon.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Rob Herring <robh@kernel.org>
+ *
+ * Based on 8250 earlycon:
+ * (c) Copyright 2004 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/serial_core.h>
+
+#ifdef CONFIG_FIX_EARLYCON_MEM
+#include <asm/fixmap.h>
+#endif
+
+#include <asm/serial.h>
+
+static struct console early_con = {
+	.name =		"uart", /* 8250 console switch requires this name */
+	.flags =	CON_PRINTBUFFER | CON_BOOT,
+	.index =	-1,
+};
+
+static struct earlycon_device early_console_dev = {
+	.con = &early_con,
+};
+
+static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
+{
+	void __iomem *base;
+#ifdef CONFIG_FIX_EARLYCON_MEM
+	set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK);
+	base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+	base += paddr & ~PAGE_MASK;
+#else
+	base = ioremap(paddr, size);
+#endif
+	if (!base)
+		pr_err("%s: Couldn't map 0x%llx\n", __func__,
+		       (unsigned long long)paddr);
+
+	return base;
+}
+
+static int __init parse_options(struct earlycon_device *device,
+				char *options)
+{
+	struct uart_port *port = &device->port;
+	int mmio, mmio32, length, ret;
+	unsigned long addr;
+
+	if (!options)
+		return -ENODEV;
+
+	mmio = !strncmp(options, "mmio,", 5);
+	mmio32 = !strncmp(options, "mmio32,", 7);
+	if (mmio || mmio32) {
+		port->iotype = (mmio ? UPIO_MEM : UPIO_MEM32);
+		options += mmio ? 5 : 7;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->mapbase = addr;
+		if (mmio32)
+			port->regshift = 2;
+	} else if (!strncmp(options, "io,", 3)) {
+		port->iotype = UPIO_PORT;
+		options += 3;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->iobase = addr;
+		mmio = 0;
+	} else if (!strncmp(options, "0x", 2)) {
+		port->iotype = UPIO_MEM;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->mapbase = addr;
+	} else {
+		return -EINVAL;
+	}
+
+	port->uartclk = BASE_BAUD * 16;
+
+	options = strchr(options, ',');
+	if (options) {
+		options++;
+		ret = kstrtouint(options, 0, &device->baud);
+		if (ret)
+			return ret;
+		length = min(strcspn(options, " ") + 1,
+			     (size_t)(sizeof(device->options)));
+		strlcpy(device->options, options, length);
+	}
+
+	if (mmio || mmio32)
+		pr_info("Early serial console at MMIO%s 0x%llx (options '%s')\n",
+			mmio32 ? "32" : "",
+			(unsigned long long)port->mapbase,
+			device->options);
+	else
+		pr_info("Early serial console at I/O port 0x%lx (options '%s')\n",
+			port->iobase,
+			device->options);
+
+	return 0;
+}
+
+int __init setup_earlycon(char *buf, const char *match,
+			  int (*setup)(struct earlycon_device *, const char *))
+{
+	int err;
+	size_t len;
+	struct uart_port *port = &early_console_dev.port;
+
+	if (!buf || !match || !setup)
+		return 0;
+
+	len = strlen(match);
+	if (strncmp(buf, match, len))
+		return 0;
+	if (buf[len] && (buf[len] != ','))
+		return 0;
+
+	buf += len + 1;
+
+	err = parse_options(&early_console_dev, buf);
+	/* On parsing error, pass the options buf to the setup function */
+	if (!err)
+		buf = NULL;
+
+	if (port->mapbase)
+		port->membase = earlycon_map(port->mapbase, 64);
+
+	early_console_dev.con->data = &early_console_dev;
+	err = setup(&early_console_dev, buf);
+	if (err < 0)
+		return err;
+	if (!early_console_dev.con->write)
+		return -ENODEV;
+
+	register_console(early_console_dev.con);
+	return 0;
+}
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 0c8a9fa2be6c..aaa0df1f2fc6 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -249,6 +249,8 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
 					ufcon |= S3C2410_UFCON_RESETRX;
 					wr_regl(port, S3C2410_UFCON, ufcon);
 					rx_enabled(port) = 1;
+					spin_unlock_irqrestore(&port->lock,
+							flags);
 					goto out;
 				}
 				continue;
@@ -297,10 +299,11 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
  ignore_char:
 		continue;
 	}
+
+	spin_unlock_irqrestore(&port->lock, flags);
 	tty_flip_buffer_push(&port->state->port);
 
  out:
-	spin_unlock_irqrestore(&port->lock, flags);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 92e1dc94ecc8..73f62caa8609 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -2,59 +2,15 @@
 # USB device configuration
 #
 
-# many non-PCI SOC chips embed OHCI
+# These are unused now, remove them once they are no longer selected
 config USB_ARCH_HAS_OHCI
-	boolean
-	# ARM:
-	default y if SA1111
-	default y if ARCH_OMAP
-	default y if ARCH_S3C24XX
-	default y if PXA27x
-	default y if PXA3xx
-	default y if ARCH_EP93XX
-	default y if ARCH_AT91
-	default y if MFD_TC6393XB
-	default y if ARCH_W90X900
-	default y if ARCH_DAVINCI_DA8XX
-	default y if ARCH_CNS3XXX
-	default y if PLAT_SPEAR
-	default y if ARCH_EXYNOS
-	# PPC:
-	default y if STB03xxx
-	default y if PPC_MPC52xx
-	# MIPS:
-	default y if MIPS_ALCHEMY
-	default y if MACH_JZ4740
-	# more:
-	default PCI
-
-# some non-PCI hcds implement EHCI
+	bool
+
 config USB_ARCH_HAS_EHCI
-	boolean
-	default y if FSL_SOC
-	default y if PPC_MPC512x
-	default y if ARCH_IXP4XX
-	default y if ARCH_W90X900
-	default y if ARCH_AT91
-	default y if ARCH_MXC
-	default y if ARCH_MXS
-	default y if ARCH_OMAP3
-	default y if ARCH_CNS3XXX
-	default y if ARCH_VT8500
-	default y if PLAT_SPEAR
-	default y if PLAT_S5P
-	default y if ARCH_MSM
-	default y if MICROBLAZE
-	default y if SPARC_LEON
-	default y if ARCH_MMP
-	default y if MACH_LOONGSON1
-	default y if PLAT_ORION
-	default PCI
-
-# some non-PCI HCDs implement xHCI
+	bool
+
 config USB_ARCH_HAS_XHCI
-	boolean
-	default PCI
+	bool
 
 menuconfig USB_SUPPORT
 	bool "USB support"
@@ -71,19 +27,8 @@ config USB_COMMON
 	default y
 	depends on USB || USB_GADGET
 
-# Host-side USB depends on having a host controller
-# NOTE:  dummy_hcd is always an option, but it's ignored here ...
-# NOTE:  SL-811 option should be board-specific ...
 config USB_ARCH_HAS_HCD
-	boolean
-	default y if USB_ARCH_HAS_OHCI
-	default y if USB_ARCH_HAS_EHCI
-	default y if USB_ARCH_HAS_XHCI
-	default y if PCMCIA && !M32R			# sl811_cs
-	default y if ARM				# SL-811
-	default y if BLACKFIN				# SL-811
-	default y if SUPERH				# r8a66597-hcd
-	default PCI
+	def_bool y
 
 # ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface.
 config USB
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 344d5e2f87d7..8bc8e066c881 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -17,7 +17,6 @@ config USB_C67X00_HCD
 
 config USB_XHCI_HCD
 	tristate "xHCI HCD (USB 3.0) support"
-	depends on USB_ARCH_HAS_XHCI
 	---help---
 	  The eXtensible Host Controller Interface (xHCI) is standard for USB 3.0
 	  "SuperSpeed" host controller hardware.
@@ -43,7 +42,6 @@ endif # USB_XHCI_HCD
 
 config USB_EHCI_HCD
 	tristate "EHCI HCD (USB 2.0) support"
-	depends on USB_ARCH_HAS_EHCI
 	---help---
 	  The Enhanced Host Controller Interface (EHCI) is standard for USB 2.0
 	  "high speed" (480 Mbit/sec, 60 Mbyte/sec) host controller hardware.
@@ -177,6 +175,13 @@ config USB_EHCI_HCD_SPEAR
           Enables support for the on-chip EHCI controller on
           ST SPEAr chips.
 
+config USB_EHCI_HCD_SYNOPSYS
+       tristate "Support for Synopsys Host-AHB USB 2.0 controller"
+	depends on USB_EHCI_HCD
+	---help---
+	  Enable support for onchip USB controllers based on DesignWare USB 2.0
+	  Host-AHB Controller IP from Synopsys.
+
 config USB_EHCI_HCD_AT91
         tristate  "Support for Atmel on-chip EHCI USB controller"
         depends on USB_EHCI_HCD && ARCH_AT91
@@ -347,7 +352,6 @@ config USB_ISP1362_HCD
 
 config USB_OHCI_HCD
 	tristate "OHCI HCD support"
-	depends on USB_ARCH_HAS_OHCI
 	select ISP1301_OMAP if MACH_OMAP_H2 || MACH_OMAP_H3
 	depends on USB_ISP1301 || !ARCH_LPC32XX
 	---help---
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 4fb73c156d72..70df7c8c6bcd 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_USB_EHCI_HCD_OMAP)	+= ehci-omap.o
 obj-$(CONFIG_USB_EHCI_HCD_ORION)	+= ehci-orion.o
 obj-$(CONFIG_USB_EHCI_HCD_SPEAR)	+= ehci-spear.o
 obj-$(CONFIG_USB_EHCI_S5P)	+= ehci-s5p.o
+obj-$(CONFIG_USB_EHCI_HCD_SYNOPSYS)	+= ehci-h20ahb.o
 obj-$(CONFIG_USB_EHCI_HCD_AT91) += ehci-atmel.o
 obj-$(CONFIG_USB_EHCI_MSM)	+= ehci-msm.o
 
diff --git a/drivers/usb/host/ehci-h20ahb.c b/drivers/usb/host/ehci-h20ahb.c
new file mode 100644
index 000000000000..7724bab1828b
--- /dev/null
+++ b/drivers/usb/host/ehci-h20ahb.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2007-2013 Texas Instruments, Inc.
+ *	Author: Vikram Pandita <vikram.pandita@ti.com>
+ *	Author: Anand Gadiyar <gadiyar@ti.com>
+ *	Author: Keshava Munegowda <keshava_mgowda@ti.com>
+ *	Author: Roger Quadros <rogerq@ti.com>
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *	Contact: Felipe Balbi <felipe.balbi@nokia.com>
+ *
+ * Based on ehci-omap.c - driver for USBHOST on OMAP3/4 processors
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/usb/ulpi.h>
+#include <linux/pm_runtime.h>
+#include <linux/gpio.h>
+#include <linux/clk.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+#include <linux/of.h>
+#include <linux/dma-mapping.h>
+
+#include "ehci.h"
+
+#define H20AHB_HS_USB_PORTS	1
+
+/* EHCI Synopsys-specific Register Set */
+#define EHCI_INSNREG04					(0xA0)
+#define EHCI_INSNREG04_DISABLE_UNSUSPEND		(1 << 5)
+#define	EHCI_INSNREG05_ULPI				(0xA4)
+#define	EHCI_INSNREG05_ULPI_CONTROL_SHIFT		31
+#define	EHCI_INSNREG05_ULPI_PORTSEL_SHIFT		24
+#define	EHCI_INSNREG05_ULPI_OPSEL_SHIFT			22
+#define	EHCI_INSNREG05_ULPI_REGADD_SHIFT		16
+#define	EHCI_INSNREG05_ULPI_EXTREGADD_SHIFT		8
+#define	EHCI_INSNREG05_ULPI_WRDATA_SHIFT		0
+
+#define DRIVER_DESC "H20AHB-EHCI Host Controller driver"
+
+static const char hcd_name[] = "ehci-h20ahb";
+
+/*-------------------------------------------------------------------------*/
+
+struct h20ahb_hcd {
+	struct usb_phy *phy[H20AHB_HS_USB_PORTS]; /* one PHY for each port */
+	int nports;
+};
+
+static inline void ehci_write(void __iomem *base, u32 reg, u32 val)
+{
+	writel_relaxed(val, base + reg);
+}
+
+static inline u32 ehci_read(void __iomem *base, u32 reg)
+{
+	return readl_relaxed(base + reg);
+}
+
+/* configure so an HC device and id are always provided */
+/* always called with process context; sleeping is OK */
+
+static struct hc_driver __read_mostly ehci_h20ahb_hc_driver;
+
+static const struct ehci_driver_overrides ehci_h20ahb_overrides __initdata = {
+	.extra_priv_size = sizeof(struct h20ahb_hcd),
+};
+
+static int ehci_h20ahb_phy_read(struct usb_phy *x, u32 reg)
+{
+	u32 val = (1 << EHCI_INSNREG05_ULPI_CONTROL_SHIFT) |
+		(1 << EHCI_INSNREG05_ULPI_PORTSEL_SHIFT) |
+		(3 << EHCI_INSNREG05_ULPI_OPSEL_SHIFT) |
+		(reg << EHCI_INSNREG05_ULPI_REGADD_SHIFT);
+	ehci_write(x->io_priv, 0, val);
+	while ((val = ehci_read(x->io_priv, 0)) &
+		(1 << EHCI_INSNREG05_ULPI_CONTROL_SHIFT));
+	return val & 0xff;
+}
+
+static int ehci_h20ahb_phy_write(struct usb_phy *x, u32 val, u32 reg)
+{
+	u32 v = (1 << EHCI_INSNREG05_ULPI_CONTROL_SHIFT) |
+		(1 << EHCI_INSNREG05_ULPI_PORTSEL_SHIFT) |
+		(2 << EHCI_INSNREG05_ULPI_OPSEL_SHIFT) |
+		(reg << EHCI_INSNREG05_ULPI_REGADD_SHIFT) |
+		(val & 0xff);
+	ehci_write(x->io_priv, 0, v);
+	while ((v = ehci_read(x->io_priv, 0)) &
+		(1 << EHCI_INSNREG05_ULPI_CONTROL_SHIFT));
+	return 0;
+}
+
+static struct usb_phy_io_ops ehci_h20ahb_phy_io_ops = {
+	.read = ehci_h20ahb_phy_read,
+	.write = ehci_h20ahb_phy_write,
+};
+
+
+/**
+ * ehci_hcd_h20ahb_probe - initialize Synopsis-based HCDs
+ *
+ * Allocates basic resources for this USB host controller, and
+ * then invokes the start() method for the HCD associated with it
+ * through the hotplug entry's driver_data.
+ */
+static int ehci_hcd_h20ahb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource	*res;
+	struct usb_hcd	*hcd;
+	void __iomem *regs;
+	int ret;
+	int irq;
+	int i;
+	struct h20ahb_hcd	*h20ahb;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	/* if (!dev->parent) {
+		dev_err(dev, "Missing parent device\n");
+		return -ENODEV;
+		}*/
+
+	/* For DT boot, get platform data from parent. i.e. usbhshost */
+	/*if (dev->of_node) {
+		pdata = dev_get_platdata(dev->parent);
+		dev->platform_data = pdata;
+	}
+
+	if (!pdata) {
+		dev_err(dev, "Missing platform data\n");
+		return -ENODEV;
+		}*/
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "EHCI irq failed\n");
+		return -ENODEV;
+	}
+
+	res =  platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	/*
+	 * Right now device-tree probed devices don't get dma_mask set.
+	 * Since shared usb code relies on it, set it here for now.
+	 * Once we have dma capability bindings this can go away.
+	 */
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	ret = -ENODEV;
+	hcd = usb_create_hcd(&ehci_h20ahb_hc_driver, dev,
+			dev_name(dev));
+	if (!hcd) {
+		dev_err(dev, "Failed to create HCD\n");
+		return -ENOMEM;
+	}
+
+	hcd->rsrc_start = res->start;
+	hcd->rsrc_len = resource_size(res);
+	hcd->regs = regs;
+	hcd_to_ehci(hcd)->caps = regs;
+
+	h20ahb = (struct h20ahb_hcd *)hcd_to_ehci(hcd)->priv;
+	h20ahb->nports = 1;
+
+	platform_set_drvdata(pdev, hcd);
+
+	/* get the PHY devices if needed */
+	for (i = 0 ; i < h20ahb->nports ; i++) {
+		struct usb_phy *phy;
+
+		/* get the PHY device */
+#if 0
+		if (dev->of_node)
+			phy = devm_usb_get_phy_by_phandle(dev, "phys", i);
+		else
+			phy = devm_usb_get_phy_dev(dev, i);
+#endif
+		phy = otg_ulpi_create(&ehci_h20ahb_phy_io_ops, 0);
+		if (IS_ERR(phy)) {
+			ret = PTR_ERR(phy);
+			dev_err(dev, "Can't get PHY device for port %d: %d\n",
+					i, ret);
+			goto err_phy;
+		}
+		phy->dev = dev;
+		usb_add_phy_dev(phy);
+
+		h20ahb->phy[i] = phy;
+		phy->io_priv = hcd->regs + EHCI_INSNREG05_ULPI;
+
+#if 0
+		usb_phy_init(h20ahb->phy[i]);
+		/* bring PHY out of suspend */
+		usb_phy_set_suspend(h20ahb->phy[i], 0);
+#endif
+	}
+
+	/* make the first port's phy the one used by hcd as well */
+	hcd->phy = h20ahb->phy[0];
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	/*
+	 * An undocumented "feature" in the H20AHB EHCI controller,
+	 * causes suspended ports to be taken out of suspend when
+	 * the USBCMD.Run/Stop bit is cleared (for example when
+	 * we do ehci_bus_suspend).
+	 * This breaks suspend-resume if the root-hub is allowed
+	 * to suspend. Writing 1 to this undocumented register bit
+	 * disables this feature and restores normal behavior.
+	 */
+	ehci_write(regs, EHCI_INSNREG04,
+				EHCI_INSNREG04_DISABLE_UNSUSPEND);
+
+	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
+	if (ret) {
+		dev_err(dev, "failed to add hcd with err %d\n", ret);
+		goto err_pm_runtime;
+	}
+	device_wakeup_enable(hcd->self.controller);
+
+	/*
+	 * Bring PHYs out of reset for non PHY modes.
+	 * Even though HSIC mode is a PHY-less mode, the reset
+	 * line exists between the chips and can be modelled
+	 * as a PHY device for reset control.
+	 */
+	for (i = 0; i < h20ahb->nports; i++) {
+		usb_phy_init(h20ahb->phy[i]);
+		/* bring PHY out of suspend */
+		usb_phy_set_suspend(h20ahb->phy[i], 0);
+	}
+
+	return 0;
+
+err_pm_runtime:
+	pm_runtime_put_sync(dev);
+
+err_phy:
+	for (i = 0; i < h20ahb->nports; i++) {
+		if (h20ahb->phy[i])
+			usb_phy_shutdown(h20ahb->phy[i]);
+	}
+
+	usb_put_hcd(hcd);
+
+	return ret;
+}
+
+
+/**
+ * ehci_hcd_h20ahb_remove - shutdown processing for EHCI HCDs
+ * @pdev: USB Host Controller being removed
+ *
+ * Reverses the effect of usb_ehci_hcd_h20ahb_probe(), first invoking
+ * the HCD's stop() method.  It is always called from a thread
+ * context, normally "rmmod", "apmd", or something similar.
+ */
+static int ehci_hcd_h20ahb_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct h20ahb_hcd *h20ahb = (struct h20ahb_hcd *)hcd_to_ehci(hcd)->priv;
+	int i;
+
+	usb_remove_hcd(hcd);
+
+	for (i = 0; i < h20ahb->nports; i++) {
+		if (h20ahb->phy[i])
+			usb_phy_shutdown(h20ahb->phy[i]);
+	}
+
+	usb_put_hcd(hcd);
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+static const struct of_device_id h20ahb_ehci_dt_ids[] = {
+	{ .compatible = "snps,ehci-h20ahb" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, h20ahb_ehci_dt_ids);
+
+static struct platform_driver ehci_hcd_h20ahb_driver = {
+	.probe			= ehci_hcd_h20ahb_probe,
+	.remove			= ehci_hcd_h20ahb_remove,
+	.shutdown		= usb_hcd_platform_shutdown,
+	/*.suspend		= ehci_hcd_h20ahb_suspend, */
+	/*.resume		= ehci_hcd_h20ahb_resume, */
+	.driver = {
+		.name		= hcd_name,
+		.of_match_table = h20ahb_ehci_dt_ids,
+	}
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int __init ehci_h20ahb_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+
+	pr_info("%s: " DRIVER_DESC "\n", hcd_name);
+
+	ehci_init_driver(&ehci_h20ahb_hc_driver, &ehci_h20ahb_overrides);
+	return platform_driver_register(&ehci_hcd_h20ahb_driver);
+}
+module_init(ehci_h20ahb_init);
+
+static void __exit ehci_h20ahb_cleanup(void)
+{
+	platform_driver_unregister(&ehci_hcd_h20ahb_driver);
+}
+module_exit(ehci_h20ahb_cleanup);
+
+MODULE_ALIAS("platform:ehci-h20ahb");
+MODULE_AUTHOR("Liviu Dudau <Liviu.Dudau@arm.com>");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 5a160063176d..f1c3ad69ca54 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -591,11 +591,16 @@ static int ehci_run (struct usb_hcd *hcd)
 	 */
 	hcc_params = ehci_readl(ehci, &ehci->caps->hcc_params);
 	if (HCC_64BIT_ADDR(hcc_params)) {
-		ehci_writel(ehci, 0, &ehci->regs->segment);
-#if 0
-// this is deeply broken on almost all architectures
+#ifdef CONFIG_ARM64
+		ehci_writel(ehci, ehci->periodic_dma >> 32, &ehci->regs->segment);
+		/*
+		 * this is deeply broken on almost all architectures
+		 * but arm64 can use it so enable it
+		 */
 		if (!dma_set_mask(hcd->self.controller, DMA_BIT_MASK(64)))
 			ehci_info(ehci, "enabled 64bit DMA\n");
+#else
+		ehci_writel(ehci, 0, &ehci->regs->segment);
 #endif
 	}
 
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 865946cde765..8c57e970bb98 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -1196,25 +1196,6 @@ MODULE_LICENSE ("GPL");
 #define PLATFORM_DRIVER		ohci_platform_driver
 #endif
 
-#if	!defined(PCI_DRIVER) &&		\
-	!defined(PLATFORM_DRIVER) &&	\
-	!defined(OMAP1_PLATFORM_DRIVER) &&	\
-	!defined(OMAP3_PLATFORM_DRIVER) &&	\
-	!defined(OF_PLATFORM_DRIVER) &&	\
-	!defined(SA1111_DRIVER) &&	\
-	!defined(PS3_SYSTEM_BUS_DRIVER) && \
-	!defined(SM501_OHCI_DRIVER) && \
-	!defined(TMIO_OHCI_DRIVER) && \
-	!defined(S3C2410_PLATFORM_DRIVER) && \
-	!defined(EXYNOS_PLATFORM_DRIVER) && \
-	!defined(EP93XX_PLATFORM_DRIVER) && \
-	!defined(AT91_PLATFORM_DRIVER) && \
-	!defined(NXP_PLATFORM_DRIVER) && \
-	!defined(DAVINCI_PLATFORM_DRIVER) && \
-	!defined(SPEAR_PLATFORM_DRIVER)
-#error "missing bus glue for ohci-hcd"
-#endif
-
 static int __init ohci_hcd_mod_init(void)
 {
 	int retval = 0;
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 2311b1e4e43c..be41733a5eac 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -198,7 +198,7 @@ config USB_RCAR_PHY
 
 config USB_ULPI
 	bool "Generic ULPI Transceiver Driver"
-	depends on ARM
+	depends on ARM || ARM64
 	help
 	  Enable this to support ULPI connected USB OTG transceivers which
 	  are likely found on embedded boards.
diff --git a/drivers/usb/phy/phy-ulpi.c b/drivers/usb/phy/phy-ulpi.c
index 17ea3f271bd8..4e3877c329f2 100644
--- a/drivers/usb/phy/phy-ulpi.c
+++ b/drivers/usb/phy/phy-ulpi.c
@@ -48,6 +48,7 @@ static struct ulpi_info ulpi_ids[] = {
 	ULPI_INFO(ULPI_ID(0x04cc, 0x1504), "NXP ISP1504"),
 	ULPI_INFO(ULPI_ID(0x0424, 0x0006), "SMSC USB331x"),
 	ULPI_INFO(ULPI_ID(0x0424, 0x0007), "SMSC USB3320"),
+	ULPI_INFO(ULPI_ID(0x0424, 0x0009), "SMSC USB334x"),
 	ULPI_INFO(ULPI_ID(0x0451, 0x1507), "TI TUSB1210"),
 };
 
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 2e937bdace6f..29a5121ce7fd 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -39,6 +39,11 @@ config VIDEOMODE_HELPERS
 config HDMI
 	bool
 
+config VEXPRESS_DVI_CONTROL
+	bool "Versatile Express DVI control"
+	depends on FB && VEXPRESS_CONFIG
+	default y
+
 menuconfig FB
 	tristate "Support for frame buffer devices"
 	---help---
@@ -312,7 +317,8 @@ config FB_PM2_FIFO_DISCONNECT
 
 config FB_ARMCLCD
 	tristate "ARM PrimeCell PL110 support"
-	depends on FB && ARM && ARM_AMBA
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on FB && ARM_AMBA
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -326,6 +332,21 @@ config FB_ARMCLCD
 	  here and read <file:Documentation/kbuild/modules.txt>.  The module
 	  will be called amba-clcd.
 
+config FB_ARMHDLCD
+	tristate "ARM High Definition LCD support"
+	depends on FB && ARM
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  This framebuffer device driver is for the ARM High Definition
+	  Colour LCD controller.
+
+	  If you want to compile this as a module (=code which can be
+	  inserted into and removed from the running kernel), say M
+	  here and read <file:Documentation/kbuild/modules.txt>.  The module
+	  will be called arm-hdlcd.
+
 config FB_ACORN
 	bool "Acorn VIDC support"
 	depends on (FB = y) && ARM && ARCH_ACORN
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index e8bae8dd4804..33869eea4981 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_FB_ATMEL)		  += atmel_lcdfb.o
 obj-$(CONFIG_FB_PVR2)             += pvr2fb.o
 obj-$(CONFIG_FB_VOODOO1)          += sstfb.o
 obj-$(CONFIG_FB_ARMCLCD)	  += amba-clcd.o
+obj-$(CONFIG_FB_ARMHDLCD)	  += arm-hdlcd.o
 obj-$(CONFIG_FB_GOLDFISH)         += goldfishfb.o
 obj-$(CONFIG_FB_68328)            += 68328fb.o
 obj-$(CONFIG_FB_GBE)              += gbefb.o
@@ -177,3 +178,6 @@ obj-$(CONFIG_VIDEOMODE_HELPERS) += display_timing.o videomode.o
 ifeq ($(CONFIG_OF),y)
 obj-$(CONFIG_VIDEOMODE_HELPERS) += of_display_timing.o of_videomode.o
 endif
+
+# platform specific output drivers
+obj-$(CONFIG_VEXPRESS_DVI_CONTROL) += vexpress-dvi.o
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index 0a2cce7285be..dba653f6badc 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -16,7 +16,10 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
+#include <linux/of.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
@@ -30,8 +33,20 @@
 
 #define to_clcd(info)	container_of(info, struct clcd_fb, fb)
 
+#ifdef CONFIG_ARM
+#define clcdfb_dma_alloc	dma_alloc_writecombine
+#define clcdfb_dma_free		dma_free_writecombine
+#define clcdfb_dma_mmap		dma_mmap_writecombine
+#else
+#define clcdfb_dma_alloc	dma_alloc_coherent
+#define clcdfb_dma_free		dma_free_coherent
+#define clcdfb_dma_mmap		dma_mmap_coherent
+#endif
+
 /* This is limited to 16 characters when displayed by X startup */
 static const char *clcd_name = "CLCD FB";
+static char *def_mode;
+module_param_named(mode, def_mode, charp, 0);
 
 /*
  * Unfortunately, the enable/disable functions may be called either from
@@ -392,6 +407,44 @@ static int clcdfb_blank(int blank_mode, struct fb_info *info)
 	return 0;
 }
 
+int clcdfb_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	return clcdfb_dma_mmap(&fb->dev->dev, vma,
+			       fb->fb.screen_base,
+			       fb->fb.fix.smem_start,
+			       fb->fb.fix.smem_len);
+}
+
+int clcdfb_mmap_io(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	unsigned long user_count, count, pfn, off;
+
+	user_count	= (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	count		= PAGE_ALIGN(fb->fb.fix.smem_len) >> PAGE_SHIFT;
+	pfn		= fb->fb.fix.smem_start >> PAGE_SHIFT;
+	off		= vma->vm_pgoff;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (off < count && user_count <= (count - off))
+		return remap_pfn_range(vma, vma->vm_start, pfn + off,
+				       user_count << PAGE_SHIFT,
+				       vma->vm_page_prot);
+
+	return -ENXIO;
+}
+
+void clcdfb_remove_dma(struct clcd_fb *fb)
+{
+	clcdfb_dma_free(&fb->dev->dev, fb->fb.fix.smem_len,
+			fb->fb.screen_base, fb->fb.fix.smem_start);
+}
+
+void clcdfb_remove_io(struct clcd_fb *fb)
+{
+	iounmap(fb->fb.screen_base);
+}
+
 static int clcdfb_mmap(struct fb_info *info,
 		       struct vm_area_struct *vma)
 {
@@ -542,14 +595,247 @@ static int clcdfb_register(struct clcd_fb *fb)
 	return ret;
 }
 
+struct string_lookup {
+	const char *string;
+	const u32	val;
+};
+
+static struct string_lookup vmode_lookups[] = {
+	{ "FB_VMODE_NONINTERLACED", FB_VMODE_NONINTERLACED},
+	{ "FB_VMODE_INTERLACED",    FB_VMODE_INTERLACED},
+	{ "FB_VMODE_DOUBLE",        FB_VMODE_DOUBLE},
+	{ "FB_VMODE_ODD_FLD_FIRST", FB_VMODE_ODD_FLD_FIRST},
+	{ NULL, 0 },
+};
+
+static struct string_lookup tim2_lookups[] = {
+	{ "TIM2_CLKSEL", TIM2_CLKSEL},
+	{ "TIM2_IVS",    TIM2_IVS},
+	{ "TIM2_IHS",    TIM2_IHS},
+	{ "TIM2_IPC",    TIM2_IPC},
+	{ "TIM2_IOE",    TIM2_IOE},
+	{ "TIM2_BCD",    TIM2_BCD},
+	{ NULL, 0},
+};
+static struct string_lookup cntl_lookups[] = {
+	{"CNTL_LCDEN",        CNTL_LCDEN},
+	{"CNTL_LCDBPP1",      CNTL_LCDBPP1},
+	{"CNTL_LCDBPP2",      CNTL_LCDBPP2},
+	{"CNTL_LCDBPP4",      CNTL_LCDBPP4},
+	{"CNTL_LCDBPP8",      CNTL_LCDBPP8},
+	{"CNTL_LCDBPP16",     CNTL_LCDBPP16},
+	{"CNTL_LCDBPP16_565", CNTL_LCDBPP16_565},
+	{"CNTL_LCDBPP16_444", CNTL_LCDBPP16_444},
+	{"CNTL_LCDBPP24",     CNTL_LCDBPP24},
+	{"CNTL_LCDBW",        CNTL_LCDBW},
+	{"CNTL_LCDTFT",       CNTL_LCDTFT},
+	{"CNTL_LCDMONO8",     CNTL_LCDMONO8},
+	{"CNTL_LCDDUAL",      CNTL_LCDDUAL},
+	{"CNTL_BGR",          CNTL_BGR},
+	{"CNTL_BEBO",         CNTL_BEBO},
+	{"CNTL_BEPO",         CNTL_BEPO},
+	{"CNTL_LCDPWR",       CNTL_LCDPWR},
+	{"CNTL_LCDVCOMP(1)",  CNTL_LCDVCOMP(1)},
+	{"CNTL_LCDVCOMP(2)",  CNTL_LCDVCOMP(2)},
+	{"CNTL_LCDVCOMP(3)",  CNTL_LCDVCOMP(3)},
+	{"CNTL_LCDVCOMP(4)",  CNTL_LCDVCOMP(4)},
+	{"CNTL_LCDVCOMP(5)",  CNTL_LCDVCOMP(5)},
+	{"CNTL_LCDVCOMP(6)",  CNTL_LCDVCOMP(6)},
+	{"CNTL_LCDVCOMP(7)",  CNTL_LCDVCOMP(7)},
+	{"CNTL_LDMAFIFOTIME", CNTL_LDMAFIFOTIME},
+	{"CNTL_WATERMARK",    CNTL_WATERMARK},
+	{ NULL, 0},
+};
+static struct string_lookup caps_lookups[] = {
+	{"CLCD_CAP_RGB444",  CLCD_CAP_RGB444},
+	{"CLCD_CAP_RGB5551", CLCD_CAP_RGB5551},
+	{"CLCD_CAP_RGB565",  CLCD_CAP_RGB565},
+	{"CLCD_CAP_RGB888",  CLCD_CAP_RGB888},
+	{"CLCD_CAP_BGR444",  CLCD_CAP_BGR444},
+	{"CLCD_CAP_BGR5551", CLCD_CAP_BGR5551},
+	{"CLCD_CAP_BGR565",  CLCD_CAP_BGR565},
+	{"CLCD_CAP_BGR888",  CLCD_CAP_BGR888},
+	{"CLCD_CAP_444",     CLCD_CAP_444},
+	{"CLCD_CAP_5551",    CLCD_CAP_5551},
+	{"CLCD_CAP_565",     CLCD_CAP_565},
+	{"CLCD_CAP_888",     CLCD_CAP_888},
+	{"CLCD_CAP_RGB",     CLCD_CAP_RGB},
+	{"CLCD_CAP_BGR",     CLCD_CAP_BGR},
+	{"CLCD_CAP_ALL",     CLCD_CAP_ALL},
+	{ NULL, 0},
+};
+
+u32 parse_setting(struct string_lookup *lookup, const char *name)
+{
+	int i = 0;
+	while (lookup[i].string != NULL) {
+		if (strcmp(lookup[i].string, name) == 0)
+			return lookup[i].val;
+		++i;
+	}
+	return -EINVAL;
+}
+
+u32 get_string_lookup(struct device_node *node, const char *name,
+		      struct string_lookup *lookup)
+{
+	const char *string;
+	int count, i, ret = 0;
+
+	count = of_property_count_strings(node, name);
+	if (count >= 0)
+		for (i = 0; i < count; i++)
+			if (of_property_read_string_index(node, name, i,
+					&string) == 0)
+				ret |= parse_setting(lookup, string);
+	return ret;
+}
+
+int get_val(struct device_node *node, const char *string)
+{
+	u32 ret = 0;
+
+	if (of_property_read_u32(node, string, &ret))
+		ret = -1;
+	return ret;
+}
+
+struct clcd_panel *getPanel(struct device_node *node)
+{
+	static struct clcd_panel panel;
+
+	panel.mode.refresh      = get_val(node, "refresh");
+	panel.mode.xres         = get_val(node, "xres");
+	panel.mode.yres         = get_val(node, "yres");
+	panel.mode.pixclock     = get_val(node, "pixclock");
+	panel.mode.left_margin  = get_val(node, "left_margin");
+	panel.mode.right_margin = get_val(node, "right_margin");
+	panel.mode.upper_margin = get_val(node, "upper_margin");
+	panel.mode.lower_margin = get_val(node, "lower_margin");
+	panel.mode.hsync_len    = get_val(node, "hsync_len");
+	panel.mode.vsync_len    = get_val(node, "vsync_len");
+	panel.mode.sync         = get_val(node, "sync");
+	panel.bpp               = get_val(node, "bpp");
+	panel.width             = (signed short) get_val(node, "width");
+	panel.height            = (signed short) get_val(node, "height");
+
+	panel.mode.vmode = get_string_lookup(node, "vmode", vmode_lookups);
+	panel.tim2       = get_string_lookup(node, "tim2",  tim2_lookups);
+	panel.cntl       = get_string_lookup(node, "cntl",  cntl_lookups);
+	panel.caps       = get_string_lookup(node, "caps",  caps_lookups);
+
+	return &panel;
+}
+
+struct clcd_panel *clcdfb_get_panel(const char *name)
+{
+	struct device_node *node = NULL;
+	const char *mode;
+	struct clcd_panel *panel = NULL;
+
+	do {
+		node = of_find_compatible_node(node, NULL, "panel");
+		if (node)
+			if (of_property_read_string(node, "mode", &mode) == 0)
+				if (strcmp(mode, name) == 0) {
+					panel = getPanel(node);
+					panel->mode.name = name;
+				}
+	} while (node != NULL);
+
+	return panel;
+}
+
+#ifdef CONFIG_OF
+static int clcdfb_dt_init(struct clcd_fb *fb)
+{
+	int err = 0;
+	struct device_node *node;
+	const char *mode;
+	dma_addr_t dma;
+	u32 use_dma;
+	const __be32 *prop;
+	int len, na, ns;
+	phys_addr_t fb_base, fb_size;
+
+	node = fb->dev->dev.of_node;
+	if (!node)
+		return -ENODEV;
+
+	na = of_n_addr_cells(node);
+	ns = of_n_size_cells(node);
+
+	if (def_mode && strlen(def_mode) > 0) {
+		fb->panel = clcdfb_get_panel(def_mode);
+		if (!fb->panel)
+			printk(KERN_ERR "CLCD: invalid mode specified on the command line (%s)\n", def_mode);
+	}
+
+	if (!fb->panel) {
+		if (WARN_ON(of_property_read_string(node, "mode", &mode)))
+			return -ENODEV;
+		fb->panel = clcdfb_get_panel(mode);
+	}
+
+	if (!fb->panel)
+		return -EINVAL;
+	fb->fb.fix.smem_len = fb->panel->mode.xres * fb->panel->mode.yres * 2;
+
+	fb->board->name		= "Device Tree CLCD PL111";
+	fb->board->caps		= CLCD_CAP_5551 | CLCD_CAP_565;
+	fb->board->check	= clcdfb_check;
+	fb->board->decode	= clcdfb_decode;
+
+	if (of_property_read_u32(node, "use_dma", &use_dma))
+		use_dma = 0;
+
+	if (use_dma) {
+		fb->fb.screen_base = clcdfb_dma_alloc(&fb->dev->dev,
+						      fb->fb.fix.smem_len,
+						      &dma, GFP_KERNEL);
+		if (!fb->fb.screen_base) {
+			pr_err("CLCD: unable to map framebuffer\n");
+			return -ENOMEM;
+		}
+
+		fb->fb.fix.smem_start	= dma;
+		fb->board->mmap		= clcdfb_mmap_dma;
+		fb->board->remove	= clcdfb_remove_dma;
+	} else {
+		prop = of_get_property(node, "framebuffer", &len);
+		if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+			return -EINVAL;
+
+		fb_base = of_read_number(prop, na);
+		fb_size = of_read_number(prop + na, ns);
+
+		fb->fb.fix.smem_start	= fb_base;
+		fb->fb.screen_base	= ioremap_wc(fb_base, fb_size);
+		fb->board->mmap		= clcdfb_mmap_io;
+		fb->board->remove	= clcdfb_remove_io;
+	}
+
+	return err;
+}
+#endif /* CONFIG_OF */
+
 static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id)
 {
 	struct clcd_board *board = dev->dev.platform_data;
 	struct clcd_fb *fb;
 	int ret;
 
-	if (!board)
-		return -EINVAL;
+	if (!board) {
+#ifdef CONFIG_OF
+		if (dev->dev.of_node) {
+			board = kzalloc(sizeof(struct clcd_board), GFP_KERNEL);
+			if (!board)
+				return -ENOMEM;
+			board->setup   = clcdfb_dt_init;
+		} else
+#endif
+			return -EINVAL;
+	}
 
 	ret = amba_request_regions(dev, NULL);
 	if (ret) {
diff --git a/drivers/video/arm-hdlcd.c b/drivers/video/arm-hdlcd.c
new file mode 100644
index 000000000000..cfd631e3dc52
--- /dev/null
+++ b/drivers/video/arm-hdlcd.c
@@ -0,0 +1,844 @@
+/*
+ * drivers/video/arm-hdlcd.c
+ *
+ * Copyright (C) 2011 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Controller
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/fb.h>
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+#include <linux/arm-hdlcd.h>
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif
+
+#include "edid.h"
+
+#ifdef CONFIG_SERIAL_AMBA_PCU_UART
+int get_edid(u8 *msgbuf);
+#else
+#endif
+
+#define to_hdlcd_device(info)	container_of(info, struct hdlcd_device, fb)
+
+static struct of_device_id  hdlcd_of_matches[] = {
+	{ .compatible	= "arm,hdlcd" },
+	{},
+};
+
+/* Framebuffer size.  */
+static unsigned long framebuffer_size;
+
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+static unsigned long buffer_underrun_events;
+static DEFINE_SPINLOCK(hdlcd_underrun_lock);
+
+static void hdlcd_underrun_set(unsigned long val)
+{
+	spin_lock(&hdlcd_underrun_lock);
+	buffer_underrun_events = val;
+	spin_unlock(&hdlcd_underrun_lock);
+}
+
+static unsigned long hdlcd_underrun_get(void)
+{
+	unsigned long val;
+	spin_lock(&hdlcd_underrun_lock);
+	val = buffer_underrun_events;
+	spin_unlock(&hdlcd_underrun_lock);
+	return val;
+}
+
+#ifdef CONFIG_PROC_FS
+static int hdlcd_underrun_show(struct seq_file *m, void *v)
+{
+	unsigned char underrun_string[32];
+	snprintf(underrun_string, 32, "%lu\n", hdlcd_underrun_get());
+	seq_puts(m, underrun_string);
+	return 0;
+}
+
+static int proc_hdlcd_underrun_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hdlcd_underrun_show, NULL);
+}
+
+static const struct file_operations proc_hdlcd_underrun_operations = {
+	.open		= proc_hdlcd_underrun_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int hdlcd_underrun_init(void)
+{
+	hdlcd_underrun_set(0);
+	proc_create("hdlcd_underrun", 0, NULL, &proc_hdlcd_underrun_operations);
+	return 0;
+}
+static void hdlcd_underrun_close(void)
+{
+	remove_proc_entry("hdlcd_underrun", NULL);
+}
+#else
+static int hdlcd_underrun_init(void) { return 0; }
+static void hdlcd_underrun_close(void) { }
+#endif
+#endif
+
+static char *fb_mode = "1680x1050-32@60\0\0\0\0\0";
+
+static struct fb_var_screeninfo cached_var_screeninfo;
+
+static struct fb_videomode hdlcd_default_mode = {
+	.refresh	= 60,
+	.xres		= 1680,
+	.yres		= 1050,
+	.pixclock	= 8403,
+	.left_margin	= 80,
+	.right_margin	= 48,
+	.upper_margin	= 21,
+	.lower_margin	= 3,
+	.hsync_len	= 32,
+	.vsync_len	= 6,
+	.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	.vmode		= FB_VMODE_NONINTERLACED
+};
+
+static inline void hdlcd_enable(struct hdlcd_device *hdlcd)
+{
+	dev_dbg(hdlcd->dev, "HDLCD: output enabled\n");
+	writel(1, hdlcd->base + HDLCD_REG_COMMAND);
+}
+
+static inline void hdlcd_disable(struct hdlcd_device *hdlcd)
+{
+	dev_dbg(hdlcd->dev, "HDLCD: output disabled\n");
+	writel(0, hdlcd->base + HDLCD_REG_COMMAND);
+}
+
+static int hdlcd_set_bitfields(struct hdlcd_device *hdlcd,
+				struct fb_var_screeninfo *var)
+{
+	int ret = 0;
+
+	memset(&var->transp, 0, sizeof(var->transp));
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->blue.offset = 0;
+
+	switch (var->bits_per_pixel) {
+	case 8:
+		/* pseudocolor */
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		break;
+	case 16:
+		/* 565 format */
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		break;
+	case 32:
+		var->transp.length = 8;
+	case 24:
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (!ret) {
+		if(var->bits_per_pixel != 32)
+		{
+			var->green.offset = var->blue.length;
+			var->red.offset = var->green.offset + var->green.length;
+		}
+		else
+		{
+			/* Previously, the byte ordering for 32-bit color was
+			 * (msb)<alpha><red><green><blue>(lsb)
+			 * but this does not match what android expects and
+			 * the colors are odd. Instead, use
+			 * <alpha><blue><green><red>
+			 * Since we tell fb what we are doing, console
+			 * , X and directfb access should work fine.
+			 */
+			var->green.offset = var->red.length;
+			var->blue.offset = var->green.offset + var->green.length;
+			var->transp.offset = var->blue.offset + var->blue.length;
+		}
+	}
+
+	return ret;
+}
+
+static int hdlcd_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	int bytes_per_pixel = var->bits_per_pixel / 8;
+
+#ifdef HDLCD_NO_VIRTUAL_SCREEN
+	var->yres_virtual = var->yres;
+#else
+	var->yres_virtual = 2 * var->yres;
+#endif
+
+	if ((var->xres_virtual * bytes_per_pixel * var->yres_virtual) > hdlcd->fb.fix.smem_len)
+		return -ENOMEM;
+
+	if (var->xres > HDLCD_MAX_XRES || var->yres > HDLCD_MAX_YRES)
+		return -EINVAL;
+
+	/* make sure the bitfields are set appropriately */
+	return hdlcd_set_bitfields(hdlcd, var);
+}
+
+/* prototype */
+static int hdlcd_pan_display(struct fb_var_screeninfo *var,
+	struct fb_info *info);
+
+#define WRITE_HDLCD_REG(reg, value)	writel((value), hdlcd->base + (reg))
+#define READ_HDLCD_REG(reg)		readl(hdlcd->base + (reg))
+
+static int hdlcd_set_par(struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	int bytes_per_pixel = hdlcd->fb.var.bits_per_pixel / 8;
+	int polarities;
+	int old_yoffset;
+
+	/* check for shortcuts */
+	old_yoffset = cached_var_screeninfo.yoffset;
+	cached_var_screeninfo.yoffset = info->var.yoffset;
+	if (!memcmp(&info->var, &cached_var_screeninfo,
+				sizeof(struct fb_var_screeninfo))) {
+		if(old_yoffset != info->var.yoffset) {
+			/* we only changed yoffset, and we already
+			 * already recorded it a couple lines up
+			 */
+			hdlcd_pan_display(&info->var, info);
+		}
+		/* or no change */
+		return 0;
+	}
+
+	hdlcd->fb.fix.line_length = hdlcd->fb.var.xres * bytes_per_pixel;
+
+	if (hdlcd->fb.var.bits_per_pixel >= 16)
+		hdlcd->fb.fix.visual = FB_VISUAL_TRUECOLOR;
+	else
+		hdlcd->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR;
+
+	memcpy(&cached_var_screeninfo, &info->var, sizeof(struct fb_var_screeninfo));
+
+	polarities = HDLCD_POLARITY_DATAEN |
+#ifndef CONFIG_ARCH_TUSCAN
+		HDLCD_POLARITY_PIXELCLK |
+#endif
+		HDLCD_POLARITY_DATA;
+	polarities |= (hdlcd->fb.var.sync & FB_SYNC_HOR_HIGH_ACT) ? HDLCD_POLARITY_HSYNC : 0;
+	polarities |= (hdlcd->fb.var.sync & FB_SYNC_VERT_HIGH_ACT) ? HDLCD_POLARITY_VSYNC : 0;
+
+	hdlcd_disable(hdlcd);
+
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_LENGTH, hdlcd->fb.var.xres * bytes_per_pixel);
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_PITCH, hdlcd->fb.var.xres * bytes_per_pixel);
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_COUNT, hdlcd->fb.var.yres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_SYNC, hdlcd->fb.var.vsync_len - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_BACK_PORCH, hdlcd->fb.var.upper_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_DATA, hdlcd->fb.var.yres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_FRONT_PORCH, hdlcd->fb.var.lower_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_SYNC, hdlcd->fb.var.hsync_len - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_BACK_PORCH, hdlcd->fb.var.left_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_DATA, hdlcd->fb.var.xres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_FRONT_PORCH, hdlcd->fb.var.right_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_POLARITIES, polarities);
+	WRITE_HDLCD_REG(HDLCD_REG_PIXEL_FORMAT, (bytes_per_pixel - 1) << 3);
+#ifdef HDLCD_RED_DEFAULT_COLOUR
+	WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, (0x00ff0000 | (hdlcd->fb.var.red.length & 0xf) << 8) \
+													  | hdlcd->fb.var.red.offset);
+#else
+	WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, ((hdlcd->fb.var.red.length & 0xf) << 8) | hdlcd->fb.var.red.offset);
+#endif
+	WRITE_HDLCD_REG(HDLCD_REG_GREEN_SELECT, ((hdlcd->fb.var.green.length & 0xf) << 8) | hdlcd->fb.var.green.offset);
+	WRITE_HDLCD_REG(HDLCD_REG_BLUE_SELECT, ((hdlcd->fb.var.blue.length & 0xf) << 8) | hdlcd->fb.var.blue.offset);
+
+	clk_set_rate(hdlcd->clk, (1000000000 / hdlcd->fb.var.pixclock) * 1000);
+	clk_enable(hdlcd->clk);
+
+	hdlcd_enable(hdlcd);
+
+	return 0;
+}
+
+static int hdlcd_setcolreg(unsigned int regno, unsigned int red, unsigned int green,
+		unsigned int blue, unsigned int transp, struct fb_info *info)
+{
+	if (regno < 16) {
+		u32 *pal = info->pseudo_palette;
+
+		pal[regno] = ((red >> 8) << info->var.red.offset) |
+			((green >> 8) << info->var.green.offset) |
+			((blue >> 8) << info->var.blue.offset);
+	}
+
+	return 0;
+}
+
+static irqreturn_t hdlcd_irq(int irq, void *data)
+{
+	struct hdlcd_device *hdlcd = data;
+	unsigned long irq_mask, irq_status;
+
+	irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK);
+	irq_status = READ_HDLCD_REG(HDLCD_REG_INT_STATUS);
+
+	/* acknowledge interrupt(s) */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_CLEAR, irq_status);
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	if (irq_status & HDLCD_INTERRUPT_UNDERRUN) {
+		/* increment the count */
+		hdlcd_underrun_set(hdlcd_underrun_get() + 1);
+	}
+#endif
+	if (irq_status & HDLCD_INTERRUPT_VSYNC) {
+		/* disable future VSYNC interrupts */
+		WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask & ~HDLCD_INTERRUPT_VSYNC);
+
+		complete(&hdlcd->vsync_completion);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hdlcd_wait_for_vsync(struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	unsigned long irq_mask;
+	int err;
+
+	/* enable VSYNC interrupt */
+	irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK);
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask | HDLCD_INTERRUPT_VSYNC);
+
+	err = wait_for_completion_interruptible_timeout(&hdlcd->vsync_completion,
+							msecs_to_jiffies(100));
+
+	if (!err)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int hdlcd_blank(int blank_mode, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+
+	switch (blank_mode) {
+	case FB_BLANK_POWERDOWN:
+		clk_disable(hdlcd->clk);
+	case FB_BLANK_NORMAL:
+		hdlcd_disable(hdlcd);
+		break;
+	case FB_BLANK_UNBLANK:
+		clk_enable(hdlcd->clk);
+		hdlcd_enable(hdlcd);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static void hdlcd_mmap_open(struct vm_area_struct *vma)
+{
+}
+
+static void hdlcd_mmap_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct hdlcd_mmap_ops = {
+	.open	= hdlcd_mmap_open,
+	.close	= hdlcd_mmap_close,
+};
+
+static int hdlcd_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	unsigned long off;
+	unsigned long start;
+	unsigned long len = hdlcd->fb.fix.smem_len;
+
+	if (vma->vm_end - vma->vm_start == 0)
+		return 0;
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+		return -EINVAL;
+
+	off = vma->vm_pgoff << PAGE_SHIFT;
+	if ((off >= len) || (vma->vm_end - vma->vm_start + off) > len)
+		return -EINVAL;
+
+	start = hdlcd->fb.fix.smem_start;
+	off += start;
+
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+	vma->vm_flags |= VM_IO;
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &hdlcd_mmap_ops;
+	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+				vma->vm_end - vma->vm_start,
+				vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static int hdlcd_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+
+	hdlcd->fb.var.yoffset = var->yoffset;
+	WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start +
+			(var->yoffset * hdlcd->fb.fix.line_length));
+
+	hdlcd_wait_for_vsync(info);
+
+	return 0;
+}
+
+static int hdlcd_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case FBIO_WAITFORVSYNC:
+		err = hdlcd_wait_for_vsync(info);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+
+	return err;
+}
+
+static struct fb_ops hdlcd_ops = {
+	.owner			= THIS_MODULE,
+	.fb_check_var		= hdlcd_check_var,
+	.fb_set_par		= hdlcd_set_par,
+	.fb_setcolreg		= hdlcd_setcolreg,
+	.fb_blank		= hdlcd_blank,
+	.fb_fillrect		= cfb_fillrect,
+	.fb_copyarea		= cfb_copyarea,
+	.fb_imageblit		= cfb_imageblit,
+	.fb_mmap		= hdlcd_mmap,
+	.fb_pan_display		= hdlcd_pan_display,
+	.fb_ioctl		= hdlcd_ioctl,
+	.fb_compat_ioctl	= hdlcd_ioctl
+};
+
+static int hdlcd_setup(struct hdlcd_device *hdlcd)
+{
+	u32 version;
+	int err = -EFAULT;
+
+	hdlcd->fb.device = hdlcd->dev;
+
+	hdlcd->clk = clk_get(hdlcd->dev, NULL);
+	if (IS_ERR(hdlcd->clk)) {
+		dev_err(hdlcd->dev, "HDLCD: unable to find clock data\n");
+		return PTR_ERR(hdlcd->clk);
+	}
+
+	err = clk_prepare(hdlcd->clk);
+	if (err)
+		goto clk_prepare_err;
+
+	hdlcd->base = ioremap_nocache(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len);
+	if (!hdlcd->base) {
+		dev_err(hdlcd->dev, "HDLCD: unable to map registers\n");
+		goto remap_err;
+	}
+
+	hdlcd->fb.pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL);
+	if (!hdlcd->fb.pseudo_palette) {
+		dev_err(hdlcd->dev, "HDLCD: unable to allocate pseudo_palette memory\n");
+		err = -ENOMEM;
+		goto kmalloc_err;
+	}
+
+	version = readl(hdlcd->base + HDLCD_REG_VERSION);
+	if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
+		dev_err(hdlcd->dev, "HDLCD: unknown product id: 0x%x\n", version);
+		err = -EINVAL;
+		goto kmalloc_err;
+	}
+	dev_info(hdlcd->dev, "HDLCD: found ARM HDLCD version r%dp%d\n",
+		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
+		version & HDLCD_VERSION_MINOR_MASK);
+
+	strcpy(hdlcd->fb.fix.id, "hdlcd");
+	hdlcd->fb.fbops			= &hdlcd_ops;
+	hdlcd->fb.flags			= FBINFO_FLAG_DEFAULT/* | FBINFO_VIRTFB*/;
+
+	hdlcd->fb.fix.type		= FB_TYPE_PACKED_PIXELS;
+	hdlcd->fb.fix.type_aux		= 0;
+	hdlcd->fb.fix.xpanstep		= 0;
+	hdlcd->fb.fix.ypanstep		= 1;
+	hdlcd->fb.fix.ywrapstep		= 0;
+	hdlcd->fb.fix.accel		= FB_ACCEL_NONE;
+
+	hdlcd->fb.var.nonstd		= 0;
+	hdlcd->fb.var.activate		= FB_ACTIVATE_NOW;
+	hdlcd->fb.var.height		= -1;
+	hdlcd->fb.var.width		= -1;
+	hdlcd->fb.var.accel_flags	= 0;
+
+	init_completion(&hdlcd->vsync_completion);
+
+	if (hdlcd->edid) {
+		/* build modedb from EDID */
+		fb_edid_to_monspecs(hdlcd->edid, &hdlcd->fb.monspecs);
+		fb_videomode_to_modelist(hdlcd->fb.monspecs.modedb,
+					hdlcd->fb.monspecs.modedb_len,
+					&hdlcd->fb.modelist);
+		fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode,
+			hdlcd->fb.monspecs.modedb,
+			hdlcd->fb.monspecs.modedb_len,
+			&hdlcd_default_mode, 32);
+	} else {
+		hdlcd->fb.monspecs.hfmin	= 0;
+		hdlcd->fb.monspecs.hfmax	= 100000;
+		hdlcd->fb.monspecs.vfmin	= 0;
+		hdlcd->fb.monspecs.vfmax	= 400;
+		hdlcd->fb.monspecs.dclkmin	= 1000000;
+		hdlcd->fb.monspecs.dclkmax	= 100000000;
+		fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, NULL, 0, &hdlcd_default_mode, 32);
+	}
+
+	dev_info(hdlcd->dev, "using %dx%d-%d@%d mode\n", hdlcd->fb.var.xres,
+		hdlcd->fb.var.yres, hdlcd->fb.var.bits_per_pixel,
+		hdlcd->fb.mode ? hdlcd->fb.mode->refresh : 60);
+	hdlcd->fb.var.xres_virtual	= hdlcd->fb.var.xres;
+#ifdef HDLCD_NO_VIRTUAL_SCREEN
+	hdlcd->fb.var.yres_virtual	= hdlcd->fb.var.yres;
+#else
+	hdlcd->fb.var.yres_virtual	= hdlcd->fb.var.yres * 2;
+#endif
+
+	/* initialise and set the palette */
+	if (fb_alloc_cmap(&hdlcd->fb.cmap, NR_PALETTE, 0)) {
+		dev_err(hdlcd->dev, "failed to allocate cmap memory\n");
+		err = -ENOMEM;
+		goto setup_err;
+	}
+	fb_set_cmap(&hdlcd->fb.cmap, &hdlcd->fb);
+
+	/* Allow max number of outstanding requests with the largest beat burst */
+	WRITE_HDLCD_REG(HDLCD_REG_BUS_OPTIONS, HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16);
+	/* Set the framebuffer base to start of allocated memory */
+	WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start);
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	/* turn on underrun interrupt for counting */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, HDLCD_INTERRUPT_UNDERRUN);
+#else
+	/* Ensure interrupts are disabled */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, 0);
+#endif	
+	fb_set_var(&hdlcd->fb, &hdlcd->fb.var);
+
+	if (!register_framebuffer(&hdlcd->fb)) {
+		return 0;
+	}
+
+	dev_err(hdlcd->dev, "HDLCD: cannot register framebuffer\n");
+
+	fb_dealloc_cmap(&hdlcd->fb.cmap);
+setup_err:
+	iounmap(hdlcd->base);
+kmalloc_err:
+	kfree(hdlcd->fb.pseudo_palette);
+remap_err:
+	clk_unprepare(hdlcd->clk);
+clk_prepare_err:
+	clk_put(hdlcd->clk);
+	return err;
+}
+
+static inline unsigned char atohex(u8 data)
+{
+	if (!isxdigit(data))
+		return 0;
+	/* truncate the upper nibble and add 9 to non-digit values */
+	return (data > 0x39) ? ((data & 0xf) + 9) : (data & 0xf);
+}
+
+/* EDID data is passed from devicetree in a literal string that can contain spaces and
+   the hexadecimal dump of the data */
+static int parse_edid_data(struct hdlcd_device *hdlcd, const u8 *edid_data, int data_len)
+{
+	int i, j;
+
+	if (!edid_data)
+		return -EINVAL;
+
+	hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL);
+	if (!hdlcd->edid)
+		return -ENOMEM;
+
+	for (i = 0, j = 0; i < data_len; i++) {
+		if (isspace(edid_data[i]))
+			continue;
+		hdlcd->edid[j++] = atohex(edid_data[i]);
+		if (j >= EDID_LENGTH)
+			break;
+	}
+
+	if (j < EDID_LENGTH) {
+		kfree(hdlcd->edid);
+		hdlcd->edid = NULL;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hdlcd_probe(struct platform_device *pdev)
+{
+	int err = 0, i;
+	struct hdlcd_device *hdlcd;
+	struct resource *mem;
+#ifdef CONFIG_OF
+	struct device_node *of_node;
+#endif
+
+	memset(&cached_var_screeninfo, 0, sizeof(struct fb_var_screeninfo));
+
+	dev_dbg(&pdev->dev, "HDLCD: probing\n");
+
+	hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL);
+	if (!hdlcd)
+		return -ENOMEM;
+
+#ifdef CONFIG_OF
+	of_node = pdev->dev.of_node;
+	if (of_node) {
+		int len;
+		const u8 *edid;
+		const __be32 *prop = of_get_property(of_node, "mode", &len);
+		if (prop)
+			strncpy(fb_mode, (char *)prop, len);
+		prop = of_get_property(of_node, "framebuffer", &len);
+		if (prop) {
+			hdlcd->fb.fix.smem_start = of_read_ulong(prop,
+					of_n_addr_cells(of_node));
+			prop += of_n_addr_cells(of_node);
+			framebuffer_size = of_read_ulong(prop,
+					of_n_size_cells(of_node));
+			if (framebuffer_size > HDLCD_MAX_FRAMEBUFFER_SIZE)
+				framebuffer_size = HDLCD_MAX_FRAMEBUFFER_SIZE;
+			dev_dbg(&pdev->dev, "HDLCD: phys_addr = 0x%lx, size = 0x%lx\n",
+				hdlcd->fb.fix.smem_start, framebuffer_size);
+		}
+		edid = of_get_property(of_node, "edid", &len);
+		if (edid) {
+			err = parse_edid_data(hdlcd, edid, len);
+#ifdef CONFIG_SERIAL_AMBA_PCU_UART
+		} else {
+			/* ask the firmware to fetch the EDID */
+			dev_dbg(&pdev->dev, "HDLCD: Requesting EDID data\n");
+			hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL);
+			if (!hdlcd->edid)
+				return -ENOMEM;
+			err = get_edid(hdlcd->edid);
+#endif /* CONFIG_SERIAL_AMBA_PCU_UART */
+		}
+		if (err)
+			dev_info(&pdev->dev, "HDLCD: Failed to parse EDID data\n");
+	}
+#endif /* CONFIG_OF */
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "HDLCD: cannot get platform resources\n");
+		err = -EINVAL;
+		goto resource_err;
+	}
+
+	i = platform_get_irq(pdev, 0);
+	if (i < 0) {
+		dev_err(&pdev->dev, "HDLCD: no irq defined for vsync\n");
+		err = -ENOENT;
+		goto resource_err;
+	} else {
+		err = request_irq(i, hdlcd_irq, 0, dev_name(&pdev->dev), hdlcd);
+		if (err) {
+			dev_err(&pdev->dev, "HDLCD: unable to request irq\n");
+			goto resource_err;
+		}
+		hdlcd->irq = i;
+	}
+
+	if (!request_mem_region(mem->start, resource_size(mem),	dev_name(&pdev->dev))) {
+		err = -ENXIO;
+		goto request_err;
+	}
+
+	if (!hdlcd->fb.fix.smem_start) {
+		dev_err(&pdev->dev, "platform did not allocate frame buffer memory\n");
+		err = -ENOMEM;
+		goto memalloc_err;
+	}
+	hdlcd->fb.screen_base = ioremap_wc(hdlcd->fb.fix.smem_start, framebuffer_size);
+	if (!hdlcd->fb.screen_base) {
+		dev_err(&pdev->dev, "unable to ioremap framebuffer\n");
+		err = -ENOMEM;
+		goto probe_err;
+	}
+
+	hdlcd->fb.screen_size = framebuffer_size;
+	hdlcd->fb.fix.smem_len = framebuffer_size;
+	hdlcd->fb.fix.mmio_start = mem->start;
+	hdlcd->fb.fix.mmio_len = resource_size(mem);
+
+	/* Clear the framebuffer */
+	memset(hdlcd->fb.screen_base, 0, framebuffer_size);
+
+	hdlcd->dev = &pdev->dev;
+
+	dev_dbg(&pdev->dev, "HDLCD: framebuffer virt base %p, phys base 0x%lX\n",
+		hdlcd->fb.screen_base, (unsigned long)hdlcd->fb.fix.smem_start);
+
+	err = hdlcd_setup(hdlcd);
+
+	if (err)
+		goto probe_err;
+
+	platform_set_drvdata(pdev, hdlcd);
+	return 0;
+
+probe_err:
+	iounmap(hdlcd->fb.screen_base);
+	memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start);
+
+memalloc_err:
+	release_mem_region(mem->start, resource_size(mem));
+
+request_err:
+	free_irq(hdlcd->irq, hdlcd);
+
+resource_err:
+	kfree(hdlcd);
+
+	return err;
+}
+
+static int hdlcd_remove(struct platform_device *pdev)
+{
+	struct hdlcd_device *hdlcd = platform_get_drvdata(pdev);
+
+	clk_disable(hdlcd->clk);
+	clk_unprepare(hdlcd->clk);
+	clk_put(hdlcd->clk);
+
+	/* unmap memory */
+	iounmap(hdlcd->fb.screen_base);
+	iounmap(hdlcd->base);
+
+	/* deallocate fb memory */
+	fb_dealloc_cmap(&hdlcd->fb.cmap);
+	kfree(hdlcd->fb.pseudo_palette);
+	memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start);
+	release_mem_region(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len);
+
+	free_irq(hdlcd->irq, NULL);
+	kfree(hdlcd);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int hdlcd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	/* not implemented yet */
+	return 0;
+}
+
+static int hdlcd_resume(struct platform_device *pdev)
+{
+	/* not implemented yet */
+	return 0;
+}
+#else
+#define hdlcd_suspend	NULL
+#define hdlcd_resume	NULL
+#endif
+
+static struct platform_driver hdlcd_driver = {
+	.probe		= hdlcd_probe,
+	.remove		= hdlcd_remove,
+	.suspend	= hdlcd_suspend,
+	.resume		= hdlcd_resume,
+	.driver	= {
+		.name		= "hdlcd",
+		.owner		= THIS_MODULE,
+		.of_match_table	= hdlcd_of_matches,
+	},
+};
+
+static int __init hdlcd_init(void)
+{
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	int err = platform_driver_register(&hdlcd_driver);
+	if (!err)
+		hdlcd_underrun_init();
+	return err;
+#else
+	return platform_driver_register(&hdlcd_driver);
+#endif
+}
+
+void __exit hdlcd_exit(void)
+{
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	hdlcd_underrun_close();
+#endif
+	platform_driver_unregister(&hdlcd_driver);
+}
+
+module_init(hdlcd_init);
+module_exit(hdlcd_exit);
+
+MODULE_AUTHOR("Liviu Dudau");
+MODULE_DESCRIPTION("ARM HDLCD core driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index bc922c47d046..d5fa5f3fe6d1 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
 
 config VGA_CONSOLE
 	bool "VGA text console" if EXPERT || !X86
-	depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER)
+	depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && !ARM64
 	default y
 	help
 	  Saying Y here will allow you to use Linux in text mode through a
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index a92783e480e6..0d8f98c79a6c 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
 	struct fb_info *info = (struct fb_info *) dev_addr;
 	struct fbcon_ops *ops = info->fbcon_par;
 
-	schedule_work(&info->queue);
+	queue_work(system_power_efficient_wq, &info->queue);
 	mod_timer(&ops->cursor_timer, jiffies + HZ/5);
 }
 
diff --git a/drivers/video/vexpress-dvi.c b/drivers/video/vexpress-dvi.c
new file mode 100644
index 000000000000..f08753450ee4
--- /dev/null
+++ b/drivers/video/vexpress-dvi.c
@@ -0,0 +1,220 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#define pr_fmt(fmt) "vexpress-dvi: " fmt
+
+#include <linux/fb.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/vexpress.h>
+
+
+static struct vexpress_config_func *vexpress_dvimode_func;
+
+static struct {
+	u32 xres, yres, mode;
+} vexpress_dvi_dvimodes[] = {
+	{ 640, 480, 0 }, /* VGA */
+	{ 800, 600, 1 }, /* SVGA */
+	{ 1024, 768, 2 }, /* XGA */
+	{ 1280, 1024, 3 }, /* SXGA */
+	{ 1600, 1200, 4 }, /* UXGA */
+	{ 1920, 1080, 5 }, /* HD1080 */
+};
+
+static void vexpress_dvi_mode_set(struct fb_info *info, u32 xres, u32 yres)
+{
+	int err = -ENOENT;
+	int i;
+
+	if (!vexpress_dvimode_func)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(vexpress_dvi_dvimodes); i++) {
+		if (vexpress_dvi_dvimodes[i].xres == xres &&
+				vexpress_dvi_dvimodes[i].yres == yres) {
+			pr_debug("mode: %ux%u = %d\n", xres, yres,
+					vexpress_dvi_dvimodes[i].mode);
+			err = vexpress_config_write(vexpress_dvimode_func, 0,
+					vexpress_dvi_dvimodes[i].mode);
+			break;
+		}
+	}
+
+	if (err)
+		pr_warn("Failed to set %ux%u mode! (%d)\n", xres, yres, err);
+}
+
+
+static struct vexpress_config_func *vexpress_muxfpga_func;
+static int vexpress_dvi_fb = -1;
+
+static int vexpress_dvi_mux_set(struct fb_info *info)
+{
+	int err;
+	u32 site = vexpress_get_site_by_dev(info->device);
+
+	if (!vexpress_muxfpga_func)
+		return -ENXIO;
+
+	err = vexpress_config_write(vexpress_muxfpga_func, 0, site);
+	if (!err) {
+		pr_debug("Selected MUXFPGA input %d (fb%d)\n", site,
+				info->node);
+		vexpress_dvi_fb = info->node;
+		vexpress_dvi_mode_set(info, info->var.xres,
+				info->var.yres);
+	} else {
+		pr_warn("Failed to select MUXFPGA input %d (fb%d)! (%d)\n",
+				site, info->node, err);
+	}
+
+	return err;
+}
+
+static int vexpress_dvi_fb_select(int fb)
+{
+	int err;
+	struct fb_info *info;
+
+	/* fb0 is the default */
+	if (fb < 0)
+		fb = 0;
+
+	info = registered_fb[fb];
+	if (!info || !lock_fb_info(info))
+		return -ENODEV;
+
+	err = vexpress_dvi_mux_set(info);
+
+	unlock_fb_info(info);
+
+	return err;
+}
+
+static ssize_t vexpress_dvi_fb_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", vexpress_dvi_fb);
+}
+
+static ssize_t vexpress_dvi_fb_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	long value;
+	int err = kstrtol(buf, 0, &value);
+
+	if (!err)
+		err = vexpress_dvi_fb_select(value);
+
+	return err ? err : count;
+}
+
+DEVICE_ATTR(fb, S_IRUGO | S_IWUSR, vexpress_dvi_fb_show,
+		vexpress_dvi_fb_store);
+
+
+static int vexpress_dvi_fb_event_notify(struct notifier_block *self,
+			      unsigned long action, void *data)
+{
+	struct fb_event *event = data;
+	struct fb_info *info = event->info;
+	struct fb_videomode *mode = event->data;
+
+	switch (action) {
+	case FB_EVENT_FB_REGISTERED:
+		if (vexpress_dvi_fb < 0)
+			vexpress_dvi_mux_set(info);
+		break;
+	case FB_EVENT_MODE_CHANGE:
+	case FB_EVENT_MODE_CHANGE_ALL:
+		if (info->node == vexpress_dvi_fb)
+			vexpress_dvi_mode_set(info, mode->xres, mode->yres);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vexpress_dvi_fb_notifier = {
+	.notifier_call = vexpress_dvi_fb_event_notify,
+};
+static bool vexpress_dvi_fb_notifier_registered;
+
+
+enum vexpress_dvi_func { FUNC_MUXFPGA, FUNC_DVIMODE };
+
+static struct of_device_id vexpress_dvi_of_match[] = {
+	{
+		.compatible = "arm,vexpress-muxfpga",
+		.data = (void *)FUNC_MUXFPGA,
+	}, {
+		.compatible = "arm,vexpress-dvimode",
+		.data = (void *)FUNC_DVIMODE,
+	},
+	{}
+};
+
+static int vexpress_dvi_probe(struct platform_device *pdev)
+{
+	enum vexpress_dvi_func func;
+	const struct of_device_id *match =
+			of_match_device(vexpress_dvi_of_match, &pdev->dev);
+
+	if (match)
+		func = (enum vexpress_dvi_func)match->data;
+	else
+		func = pdev->id_entry->driver_data;
+
+	switch (func) {
+	case FUNC_MUXFPGA:
+		vexpress_muxfpga_func =
+				vexpress_config_func_get_by_dev(&pdev->dev);
+		device_create_file(&pdev->dev, &dev_attr_fb);
+		break;
+	case FUNC_DVIMODE:
+		vexpress_dvimode_func =
+				vexpress_config_func_get_by_dev(&pdev->dev);
+		break;
+	}
+
+	if (!vexpress_dvi_fb_notifier_registered) {
+		fb_register_client(&vexpress_dvi_fb_notifier);
+		vexpress_dvi_fb_notifier_registered = true;
+	}
+
+	vexpress_dvi_fb_select(vexpress_dvi_fb);
+
+	return 0;
+}
+
+static const struct platform_device_id vexpress_dvi_id_table[] = {
+	{ .name = "vexpress-muxfpga", .driver_data = FUNC_MUXFPGA, },
+	{ .name = "vexpress-dvimode", .driver_data = FUNC_DVIMODE, },
+	{}
+};
+
+static struct platform_driver vexpress_dvi_driver = {
+	.probe = vexpress_dvi_probe,
+	.driver = {
+		.name = "vexpress-dvi",
+		.of_match_table = vexpress_dvi_of_match,
+	},
+	.id_table = vexpress_dvi_id_table,
+};
+
+static int __init vexpress_dvi_init(void)
+{
+	return platform_driver_register(&vexpress_dvi_driver);
+}
+device_initcall(vexpress_dvi_init);
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e2e4ef..4d24d17bcfc1 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
 #define	ELF_HWCAP		COMPAT_ELF_HWCAP
 #endif
 
+#ifdef	COMPAT_ELF_HWCAP2
+#undef	ELF_HWCAP2
+#define	ELF_HWCAP2		COMPAT_ELF_HWCAP2
+#endif
+
 #ifdef	COMPAT_ARCH_DLINFO
 #undef	ARCH_DLINFO
 #define	ARCH_DLINFO		COMPAT_ARCH_DLINFO
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h
deleted file mode 100644
index 294b1e755ab2..000000000000
--- a/include/asm-generic/dma-contiguous.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef ASM_DMA_CONTIGUOUS_H
-#define ASM_DMA_CONTIGUOUS_H
-
-#ifdef __KERNEL__
-#ifdef CONFIG_CMA
-
-#include <linux/device.h>
-#include <linux/dma-contiguous.h>
-
-static inline struct cma *dev_get_cma_area(struct device *dev)
-{
-	if (dev && dev->cma_area)
-		return dev->cma_area;
-	return dma_contiguous_default_area;
-}
-
-static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
-{
-	if (dev)
-		dev->cma_area = cma;
-	if (!dev && !dma_contiguous_default_area)
-		dma_contiguous_default_area = cma;
-}
-
-#endif
-#endif
-
-#endif
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
new file mode 100644
index 000000000000..a5de55c04fb2
--- /dev/null
+++ b/include/asm-generic/early_ioremap.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_EARLY_IOREMAP_H_
+#define _ASM_EARLY_IOREMAP_H_
+
+#include <linux/types.h>
+
+/*
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ */
+extern void __iomem *early_ioremap(resource_size_t phys_addr,
+				   unsigned long size);
+extern void *early_memremap(resource_size_t phys_addr,
+			    unsigned long size);
+extern void early_iounmap(void __iomem *addr, unsigned long size);
+extern void early_memunmap(void *addr, unsigned long size);
+
+/*
+ * Weak function called by early_ioremap_reset(). It does nothing, but
+ * architectures may provide their own version to do any needed cleanups.
+ */
+extern void early_ioremap_shutdown(void);
+
+#if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU)
+/* Arch-specific initialization */
+extern void early_ioremap_init(void);
+
+/* Generic initialization called by architecture code */
+extern void early_ioremap_setup(void);
+
+/*
+ * Called as last step in paging_init() so library can act
+ * accordingly for subsequent map/unmap requests.
+ */
+extern void early_ioremap_reset(void);
+
+#else
+static inline void early_ioremap_init(void) { }
+static inline void early_ioremap_setup(void) { }
+static inline void early_ioremap_reset(void) { }
+#endif
+
+#endif /* _ASM_EARLY_IOREMAP_H_ */
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
new file mode 100644
index 000000000000..5a64ca4621f3
--- /dev/null
+++ b/include/asm-generic/fixmap.h
@@ -0,0 +1,97 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009
+ * Break out common bits to asm-generic by Mark Salter, November 2013
+ */
+
+#ifndef __ASM_GENERIC_FIXMAP_H
+#define __ASM_GENERIC_FIXMAP_H
+
+#include <linux/bug.h>
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without translation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+/*
+ * Provide some reasonable defaults for page flags.
+ * Not all architectures use all of these different types and some
+ * architectures use different names.
+ */
+#ifndef FIXMAP_PAGE_NORMAL
+#define FIXMAP_PAGE_NORMAL PAGE_KERNEL
+#endif
+#ifndef FIXMAP_PAGE_NOCACHE
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
+#endif
+#ifndef FIXMAP_PAGE_IO
+#define FIXMAP_PAGE_IO PAGE_KERNEL_IO
+#endif
+#ifndef FIXMAP_PAGE_CLEAR
+#define FIXMAP_PAGE_CLEAR __pgprot(0)
+#endif
+
+#ifndef set_fixmap
+#define set_fixmap(idx, phys)				\
+	__set_fixmap(idx, phys, FIXMAP_PAGE_NORMAL)
+#endif
+
+#ifndef clear_fixmap
+#define clear_fixmap(idx)			\
+	__set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR)
+#endif
+
+/* Return a pointer with offset calculated */
+#define __set_fixmap_offset(idx, phys, flags)		      \
+({							      \
+	unsigned long addr;				      \
+	__set_fixmap(idx, phys, flags);			      \
+	addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
+	addr;						      \
+})
+
+#define set_fixmap_offset(idx, phys) \
+	__set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL)
+
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+	__set_fixmap(idx, phys, FIXMAP_PAGE_NOCACHE)
+
+#define set_fixmap_offset_nocache(idx, phys) \
+	__set_fixmap_offset(idx, phys, FIXMAP_PAGE_NOCACHE)
+
+/*
+ * Some fixmaps are for IO
+ */
+#define set_fixmap_io(idx, phys) \
+	__set_fixmap(idx, phys, FIXMAP_PAGE_IO)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_GENERIC_FIXMAP_H */
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index bb1e2cdeb9bf..d48bf5a95cc1 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_POWERPC_RWSEM_H
-#define _ASM_POWERPC_RWSEM_H
+#ifndef _ASM_GENERIC_RWSEM_H
+#define _ASM_GENERIC_RWSEM_H
 
 #ifndef _LINUX_RWSEM_H
 #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
@@ -8,7 +8,7 @@
 #ifdef __KERNEL__
 
 /*
- * R/W semaphores for PPC using the stuff in lib/rwsem.c.
+ * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
  * Adapted largely from include/asm-i386/rwsem.h
  * by Paul Mackerras <paulus@samba.org>.
  */
@@ -16,7 +16,7 @@
 /*
  * the semaphore definition
  */
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_64BIT
 # define RWSEM_ACTIVE_MASK		0xffffffffL
 #else
 # define RWSEM_ACTIVE_MASK		0x0000ffffL
@@ -129,4 +129,4 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 }
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_POWERPC_RWSEM_H */
+#endif	/* _ASM_GENERIC_RWSEM_H */
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
new file mode 100644
index 000000000000..f57eb7b5c23b
--- /dev/null
+++ b/include/asm-generic/simd.h
@@ -0,0 +1,14 @@
+
+#include <linux/hardirq.h>
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ *                instructions or access the SIMD register file
+ *
+ * As architectures typically don't preserve the SIMD register file when
+ * taking an interrupt, !in_interrupt() should be a reasonable default.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+	return !in_interrupt();
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index eb58d2d7d971..7414dd5132e5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -171,6 +171,16 @@
 #define CLK_OF_TABLES()
 #endif
 
+#ifdef CONFIG_OF_RESERVED_MEM
+#define RESERVEDMEM_OF_TABLES()				\
+	. = ALIGN(8);					\
+	VMLINUX_SYMBOL(__reservedmem_of_table) = .;	\
+	*(__reservedmem_of_table)			\
+	*(__reservedmem_of_table_end)
+#else
+#define RESERVEDMEM_OF_TABLES()
+#endif
+
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	VMLINUX_SYMBOL(__dtb_start) = .;				\
@@ -515,6 +525,7 @@
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
+	RESERVEDMEM_OF_TABLES()						\
 	CLKSRC_OF_TABLES()						\
 	KERNEL_DTB()							\
 	IRQCHIP_OF_MATCH_TABLE()
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 3f21f1b72e45..94f9ea8abcae 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -49,4 +49,8 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 	return (val + c->high_bits) & ~rhs;
 }
 
+#ifndef zero_bytemask
+#define zero_bytemask(mask) (~1ul << __fls(mask))
+#endif
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index c463ce990c48..6d26b40cbf5d 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -23,16 +23,30 @@
 #define ARCH_TIMER_CTRL_IT_MASK		(1 << 1)
 #define ARCH_TIMER_CTRL_IT_STAT		(1 << 2)
 
-#define ARCH_TIMER_REG_CTRL		0
-#define ARCH_TIMER_REG_TVAL		1
+enum arch_timer_reg {
+	ARCH_TIMER_REG_CTRL,
+	ARCH_TIMER_REG_TVAL,
+};
 
 #define ARCH_TIMER_PHYS_ACCESS		0
 #define ARCH_TIMER_VIRT_ACCESS		1
+#define ARCH_TIMER_MEM_PHYS_ACCESS	2
+#define ARCH_TIMER_MEM_VIRT_ACCESS	3
+
+#define ARCH_TIMER_USR_PCT_ACCESS_EN	(1 << 0) /* physical counter */
+#define ARCH_TIMER_USR_VCT_ACCESS_EN	(1 << 1) /* virtual counter */
+#define ARCH_TIMER_VIRT_EVT_EN		(1 << 2)
+#define ARCH_TIMER_EVT_TRIGGER_SHIFT	(4)
+#define ARCH_TIMER_EVT_TRIGGER_MASK	(0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
+#define ARCH_TIMER_USR_VT_ACCESS_EN	(1 << 8) /* virtual timer registers */
+#define ARCH_TIMER_USR_PT_ACCESS_EN	(1 << 9) /* physical timer registers */
+
+#define ARCH_TIMER_EVT_STREAM_FREQ	10000	/* 100us */
 
 #ifdef CONFIG_ARM_ARCH_TIMER
 
 extern u32 arch_timer_get_rate(void);
-extern u64 arch_timer_read_counter(void);
+extern u64 (*arch_timer_read_counter)(void);
 extern struct timecounter *arch_timer_get_timecounter(void);
 
 #else
diff --git a/include/crypto/ablk_helper.h b/include/crypto/ablk_helper.h
new file mode 100644
index 000000000000..4f93df50c23e
--- /dev/null
+++ b/include/crypto/ablk_helper.h
@@ -0,0 +1,31 @@
+/*
+ * Shared async block cipher helpers
+ */
+
+#ifndef _CRYPTO_ABLK_HELPER_H
+#define _CRYPTO_ABLK_HELPER_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <crypto/cryptd.h>
+
+struct async_helper_ctx {
+	struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+			unsigned int key_len);
+
+extern int __ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_decrypt(struct ablkcipher_request *req);
+
+extern void ablk_exit(struct crypto_tfm *tfm);
+
+extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
+
+extern int ablk_init(struct crypto_tfm *tfm);
+
+#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 418d270e1806..063f8ef49301 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -100,9 +100,12 @@ struct blkcipher_walk {
 	void *page;
 	u8 *buffer;
 	u8 *iv;
+	unsigned int ivsize;
 
 	int flags;
-	unsigned int blocksize;
+	unsigned int walk_blocksize;
+	unsigned int cipher_blocksize;
+	unsigned int alignmask;
 };
 
 struct ablkcipher_walk {
@@ -192,6 +195,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc,
 int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      struct blkcipher_walk *walk,
 			      unsigned int blocksize);
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize);
 
 int ablkcipher_walk_done(struct ablkcipher_request *req,
 			 struct ablkcipher_walk *walk, int err);
diff --git a/include/dt-bindings/thermal/thermal.h b/include/dt-bindings/thermal/thermal.h
new file mode 100644
index 000000000000..59822a995858
--- /dev/null
+++ b/include/dt-bindings/thermal/thermal.h
@@ -0,0 +1,17 @@
+/*
+ * This header provides constants for most thermal bindings.
+ *
+ * Copyright (C) 2013 Texas Instruments
+ *	Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ * GPLv2 only
+ */
+
+#ifndef _DT_BINDINGS_THERMAL_THERMAL_H
+#define _DT_BINDINGS_THERMAL_THERMAL_H
+
+/* On cooling devices upper and lower limits */
+#define THERMAL_NO_LIMIT		(-1UL)
+
+#endif
+
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 68cb9e1dfb81..ad9db6045b2f 100644
--- a/arch/arm/include/asm/kvm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -61,10 +61,16 @@ struct arch_timer_cpu {
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #else
 static inline int kvm_timer_hyp_init(void)
 {
@@ -76,10 +82,22 @@ static inline int kvm_timer_init(struct kvm *kvm)
 	return 0;
 }
 
+static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
new file mode 100644
index 000000000000..2f2aac8448a4
--- /dev/null
+++ b/include/kvm/arm_vgic.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_KVM_VGIC_H
+#define __ASM_ARM_KVM_VGIC_H
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/irqreturn.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define VGIC_NR_IRQS_LEGACY	256
+#define VGIC_NR_SGIS		16
+#define VGIC_NR_PPIS		16
+#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
+
+#define VGIC_V2_MAX_LRS		(1 << 6)
+#define VGIC_V3_MAX_LRS		16
+#define VGIC_MAX_IRQS		1024
+
+/* Sanity checks... */
+#if (KVM_MAX_VCPUS > 8)
+#error	Invalid number of CPU interfaces
+#endif
+
+#if (VGIC_NR_IRQS_LEGACY & 31)
+#error "VGIC_NR_IRQS must be a multiple of 32"
+#endif
+
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
+#error "VGIC_NR_IRQS must be <= 1024"
+#endif
+
+/*
+ * The GIC distributor registers describing interrupts have two parts:
+ * - 32 per-CPU interrupts (SGI + PPI)
+ * - a bunch of shared interrupts (SPI)
+ */
+struct vgic_bitmap {
+	/*
+	 * - One UL per VCPU for private interrupts (assumes UL is at
+	 *   least 32 bits)
+	 * - As many UL as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, one UL per vcpu (the state for vcpu n is in
+	 * private[n]). The shared interrupts are accessed via the
+	 * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+	 */
+	unsigned long *private;
+	unsigned long *shared;
+};
+
+struct vgic_bytemap {
+	/*
+	 * - 8 u32 per VCPU for private interrupts
+	 * - As many u32 as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, (the state for vcpu n is in private[n*8] to
+	 * private[n*8 + 7]). The shared interrupts are accessed via
+	 * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+	 * shared[(n-32)/4] word).
+	 */
+	u32 *private;
+	u32 *shared;
+};
+
+struct kvm_vcpu;
+
+enum vgic_type {
+	VGIC_V2,		/* Good ol' GICv2 */
+	VGIC_V3,		/* New fancy GICv3 */
+};
+
+#define LR_STATE_PENDING	(1 << 0)
+#define LR_STATE_ACTIVE		(1 << 1)
+#define LR_STATE_MASK		(3 << 0)
+#define LR_EOI_INT		(1 << 2)
+
+struct vgic_lr {
+	u16	irq;
+	u8	source;
+	u8	state;
+};
+
+struct vgic_vmcr {
+	u32	ctlr;
+	u32	abpr;
+	u32	bpr;
+	u32	pmr;
+};
+
+struct vgic_ops {
+	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
+	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
+	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
+	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*enable)(struct kvm_vcpu *vcpu);
+};
+
+struct vgic_params {
+	/* vgic type */
+	enum vgic_type	type;
+	/* Physical address of vgic virtual cpu interface */
+	phys_addr_t	vcpu_base;
+	/* Number of list registers */
+	u32		nr_lr;
+	/* Interrupt number */
+	unsigned int	maint_irq;
+	/* Virtual control interface base address */
+	void __iomem	*vctrl_base;
+};
+
+struct vgic_dist {
+#ifdef CONFIG_KVM_ARM_VGIC
+	spinlock_t		lock;
+	bool			in_kernel;
+	bool			ready;
+
+	int			nr_cpus;
+	int			nr_irqs;
+
+	/* Virtual control interface mapping */
+	void __iomem		*vctrl_base;
+
+	/* Distributor and vcpu interface mapping in the guest */
+	phys_addr_t		vgic_dist_base;
+	phys_addr_t		vgic_cpu_base;
+
+	/* Distributor enabled */
+	u32			enabled;
+
+	/* Interrupt enabled (one bit per IRQ) */
+	struct vgic_bitmap	irq_enabled;
+
+	/* Level-triggered interrupt external input is asserted */
+	struct vgic_bitmap	irq_level;
+
+	/*
+	 * Interrupt state is pending on the distributor
+	 */
+	struct vgic_bitmap	irq_pending;
+
+	/*
+	 * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+	 * interrupts.  Essentially holds the state of the flip-flop in
+	 * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+	 * Once set, it is only cleared for level-triggered interrupts on
+	 * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+	 */
+	struct vgic_bitmap	irq_soft_pend;
+
+	/* Level-triggered interrupt queued on VCPU interface */
+	struct vgic_bitmap	irq_queued;
+
+	/* Interrupt priority. Not used yet. */
+	struct vgic_bytemap	irq_priority;
+
+	/* Level/edge triggered */
+	struct vgic_bitmap	irq_cfg;
+
+	/*
+	 * Source CPU per SGI and target CPU:
+	 *
+	 * Each byte represent a SGI observable on a VCPU, each bit of
+	 * this byte indicating if the corresponding VCPU has
+	 * generated this interrupt. This is a GICv2 feature only.
+	 *
+	 * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+	 * the SGIs observable on VCPUn.
+	 */
+	u8			*irq_sgi_sources;
+
+	/*
+	 * Target CPU for each SPI:
+	 *
+	 * Array of available SPI, each byte indicating the target
+	 * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+	 */
+	u8			*irq_spi_cpu;
+
+	/*
+	 * Reverse lookup of irq_spi_cpu for faster compute pending:
+	 *
+	 * Array of bitmaps, one per VCPU, describing if IRQn is
+	 * routed to a particular VCPU.
+	 */
+	struct vgic_bitmap	*irq_spi_target;
+
+	/* Bitmap indicating which CPU has something pending */
+	unsigned long		*irq_pending_on_cpu;
+#endif
+};
+
+struct vgic_v2_cpu_if {
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr[2];	/* Saved only */
+	u32		vgic_elrsr[2];	/* Saved only */
+	u32		vgic_apr;
+	u32		vgic_lr[VGIC_V2_MAX_LRS];
+};
+
+struct vgic_v3_cpu_if {
+#ifdef CONFIG_ARM_GIC_V3
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr;	/* Saved only */
+	u32		vgic_elrsr;	/* Saved only */
+	u32		vgic_ap0r[4];
+	u32		vgic_ap1r[4];
+	u64		vgic_lr[VGIC_V3_MAX_LRS];
+#endif
+};
+
+struct vgic_cpu {
+#ifdef CONFIG_KVM_ARM_VGIC
+	/* per IRQ to LR mapping */
+	u8		*vgic_irq_lr_map;
+
+	/* Pending interrupts on this VCPU */
+	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
+	unsigned long	*pending_shared;
+
+	/* Bitmap of used/free list registers */
+	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
+
+	/* Number of list registers on this CPU */
+	int		nr_lr;
+
+	/* CPU vif control registers for world switch */
+	union {
+		struct vgic_v2_cpu_if	vgic_v2;
+		struct vgic_v3_cpu_if	vgic_v3;
+	};
+#endif
+};
+
+#define LR_EMPTY	0xff
+
+#define INT_STATUS_EOI		(1 << 0)
+#define INT_STATUS_UNDERFLOW	(1 << 1)
+
+struct kvm;
+struct kvm_vcpu;
+struct kvm_run;
+struct kvm_exit_mmio;
+
+#ifdef CONFIG_KVM_ARM_VGIC
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
+int kvm_vgic_hyp_init(void);
+int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm);
+void kvm_vgic_destroy(struct kvm *kvm);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+			bool level);
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		      struct kvm_exit_mmio *mmio);
+
+#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
+#define vgic_initialized(k)	((k)->arch.vgic.ready)
+
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+#ifdef CONFIG_ARM_GIC_V3
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+#else
+static inline int vgic_v3_probe(struct device_node *vgic_node,
+				const struct vgic_ops **ops,
+				const struct vgic_params **params)
+{
+	return -ENODEV;
+}
+#endif
+
+#else
+static inline int kvm_vgic_hyp_init(void)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+	return -ENXIO;
+}
+
+static inline int kvm_vgic_init(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_create(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
+				      unsigned int irq_num, bool level)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	return false;
+}
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline bool vgic_initialized(struct kvm *kvm)
+{
+	return true;
+}
+#endif
+
+#endif
diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h
new file mode 100644
index 000000000000..79d6edf446d5
--- /dev/null
+++ b/include/linux/arm-cci.h
@@ -0,0 +1,61 @@
+/*
+ * CCI cache coherent interconnect support
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_ARM_CCI_H
+#define __LINUX_ARM_CCI_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct device_node;
+
+#ifdef CONFIG_ARM_CCI
+extern bool cci_probed(void);
+extern int cci_ace_get_port(struct device_node *dn);
+extern int cci_disable_port_by_cpu(u64 mpidr);
+extern int __cci_control_port_by_device(struct device_node *dn, bool enable);
+extern int __cci_control_port_by_index(u32 port, bool enable);
+#else
+static inline bool cci_probed(void) { return false; }
+static inline int cci_ace_get_port(struct device_node *dn)
+{
+	return -ENODEV;
+}
+static inline int cci_disable_port_by_cpu(u64 mpidr) { return -ENODEV; }
+static inline int __cci_control_port_by_device(struct device_node *dn,
+					       bool enable)
+{
+	return -ENODEV;
+}
+static inline int __cci_control_port_by_index(u32 port, bool enable)
+{
+	return -ENODEV;
+}
+#endif
+#define cci_disable_port_by_device(dev) \
+	__cci_control_port_by_device(dev, false)
+#define cci_enable_port_by_device(dev) \
+	__cci_control_port_by_device(dev, true)
+#define cci_disable_port_by_index(dev) \
+	__cci_control_port_by_index(dev, false)
+#define cci_enable_port_by_index(dev) \
+	__cci_control_port_by_index(dev, true)
+
+#endif
diff --git a/include/linux/arm-hdlcd.h b/include/linux/arm-hdlcd.h
new file mode 100644
index 000000000000..939f3a81d56b
--- /dev/null
+++ b/include/linux/arm-hdlcd.h
@@ -0,0 +1,122 @@
+/*
+ * include/linux/arm-hdlcd.h
+ *
+ * Copyright (C) 2011 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Controller register definition
+ */
+
+#include <linux/fb.h>
+#include <linux/completion.h>
+
+/* register offsets */
+#define HDLCD_REG_VERSION		0x0000	/* ro */
+#define HDLCD_REG_INT_RAWSTAT		0x0010	/* rw */
+#define HDLCD_REG_INT_CLEAR		0x0014	/* wo */
+#define HDLCD_REG_INT_MASK		0x0018	/* rw */
+#define HDLCD_REG_INT_STATUS		0x001c	/* ro */
+#define HDLCD_REG_USER_OUT		0x0020	/* rw */
+#define HDLCD_REG_FB_BASE		0x0100	/* rw */
+#define HDLCD_REG_FB_LINE_LENGTH	0x0104	/* rw */
+#define HDLCD_REG_FB_LINE_COUNT		0x0108	/* rw */
+#define HDLCD_REG_FB_LINE_PITCH		0x010c	/* rw */
+#define HDLCD_REG_BUS_OPTIONS		0x0110	/* rw */
+#define HDLCD_REG_V_SYNC		0x0200	/* rw */
+#define HDLCD_REG_V_BACK_PORCH		0x0204	/* rw */
+#define HDLCD_REG_V_DATA		0x0208	/* rw */
+#define HDLCD_REG_V_FRONT_PORCH		0x020c	/* rw */
+#define HDLCD_REG_H_SYNC		0x0210	/* rw */
+#define HDLCD_REG_H_BACK_PORCH		0x0214	/* rw */
+#define HDLCD_REG_H_DATA		0x0218	/* rw */
+#define HDLCD_REG_H_FRONT_PORCH		0x021c	/* rw */
+#define HDLCD_REG_POLARITIES		0x0220	/* rw */
+#define HDLCD_REG_COMMAND		0x0230	/* rw */
+#define HDLCD_REG_PIXEL_FORMAT		0x0240	/* rw */
+#define HDLCD_REG_BLUE_SELECT		0x0244	/* rw */
+#define HDLCD_REG_GREEN_SELECT		0x0248	/* rw */
+#define HDLCD_REG_RED_SELECT		0x024c	/* rw */
+
+/* version */
+#define HDLCD_PRODUCT_ID		0x1CDC0000
+#define HDLCD_PRODUCT_MASK		0xFFFF0000
+#define HDLCD_VERSION_MAJOR_MASK	0x0000FF00
+#define HDLCD_VERSION_MINOR_MASK	0x000000FF
+
+/* interrupts */
+#define HDLCD_INTERRUPT_DMA_END		(1 << 0)
+#define HDLCD_INTERRUPT_BUS_ERROR	(1 << 1)
+#define HDLCD_INTERRUPT_VSYNC		(1 << 2)
+#define HDLCD_INTERRUPT_UNDERRUN	(1 << 3)
+
+/* polarity */
+#define HDLCD_POLARITY_VSYNC		(1 << 0)
+#define HDLCD_POLARITY_HSYNC		(1 << 1)
+#define HDLCD_POLARITY_DATAEN		(1 << 2)
+#define HDLCD_POLARITY_DATA		(1 << 3)
+#define HDLCD_POLARITY_PIXELCLK		(1 << 4)
+
+/* commands */
+#define HDLCD_COMMAND_DISABLE		(0 << 0)
+#define HDLCD_COMMAND_ENABLE		(1 << 0)
+
+/* pixel format */
+#define HDLCD_PIXEL_FMT_LITTLE_ENDIAN	(0 << 31)
+#define HDLCD_PIXEL_FMT_BIG_ENDIAN	(1 << 31)
+#define HDLCD_BYTES_PER_PIXEL_MASK	(3 << 3)
+
+/* bus options */
+#define HDLCD_BUS_BURST_MASK		0x01f
+#define HDLCD_BUS_MAX_OUTSTAND		0xf00
+#define HDLCD_BUS_BURST_NONE		(0 << 0)
+#define HDLCD_BUS_BURST_1		(1 << 0)
+#define HDLCD_BUS_BURST_2		(1 << 1)
+#define HDLCD_BUS_BURST_4		(1 << 2)
+#define HDLCD_BUS_BURST_8		(1 << 3)
+#define HDLCD_BUS_BURST_16		(1 << 4)
+
+/* Max resolution supported is 4096x4096, 8 bit per color component,
+   8 bit alpha, but we are going to choose the usual hardware default
+   (2048x2048, 32 bpp) and enable double buffering */
+#define HDLCD_MAX_XRES			2048
+#define HDLCD_MAX_YRES			2048
+#define HDLCD_MAX_FRAMEBUFFER_SIZE	(HDLCD_MAX_XRES * HDLCD_MAX_YRES << 2)
+
+#define HDLCD_MEM_BASE			(CONFIG_PAGE_OFFSET - 0x1000000)
+
+#define NR_PALETTE	256
+
+/* OEMs using HDLCD may wish to enable these settings if
+ * display disruption is apparent and you suspect HDLCD
+ * access to RAM may be starved.
+ */
+/* Turn HDLCD default color red instead of black so
+ * that it's easy to see pixel clock data underruns
+ * (compared to other visual disruption)
+ */
+//#define HDLCD_RED_DEFAULT_COLOUR
+/* Add a counter in the IRQ handler to count buffer underruns
+ * and /proc/hdlcd_underrun to read the counter
+ */
+//#define HDLCD_COUNT_BUFFERUNDERRUNS
+/* Restrict height to 1x screen size
+ *
+ */
+//#define HDLCD_NO_VIRTUAL_SCREEN
+
+#ifdef CONFIG_ANDROID
+#define HDLCD_NO_VIRTUAL_SCREEN
+#endif
+
+struct hdlcd_device {
+	struct fb_info		fb;
+	struct device		*dev;
+	struct clk		*clk;
+	void __iomem		*base;
+	int			irq;
+	struct completion	vsync_completion;
+	unsigned char		*edid;
+};
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 11860985fecb..4c544c814d26 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -435,6 +435,7 @@ struct clk_onecell_data {
 	unsigned int clk_num;
 };
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
+int of_clk_get_parent_count(struct device_node *np);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
 
 void of_clk_init(const struct of_device_id *matches);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 9f3c7e81270a..322e0afc8634 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -29,6 +29,7 @@ struct cpu {
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
+extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id);
 
 extern int cpu_add_dev_attr(struct device_attribute *attr);
 extern void cpu_remove_dev_attr(struct device_attribute *attr);
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index a5d52eea8232..c303d383def1 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -24,6 +24,7 @@
 #ifndef __CPU_COOLING_H__
 #define __CPU_COOLING_H__
 
+#include <linux/of.h>
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
@@ -36,6 +37,24 @@ struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus);
 
 /**
+ * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
+ * @np: a valid struct device_node to the cooling device device tree node.
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ */
+#ifdef CONFIG_THERMAL_OF
+struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+			    const struct cpumask *clip_cpus);
+#else
+static inline struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+			    const struct cpumask *clip_cpus)
+{
+	return NULL;
+}
+#endif
+
+/**
  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  * @cdev: thermal cooling device pointer.
  */
@@ -48,6 +67,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
 	return NULL;
 }
+static inline struct thermal_cooling_device *
+of_cpufreq_cooling_register(struct device_node *np,
+			    const struct cpumask *clip_cpus)
+{
+	return NULL;
+}
 static inline
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h
new file mode 100644
index 000000000000..c4d4eb8ac9fe
--- /dev/null
+++ b/include/linux/cpufeature.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_CPUFEATURE_H
+#define __LINUX_CPUFEATURE_H
+
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+
+#include <linux/mod_devicetable.h>
+#include <asm/cpufeature.h>
+
+/*
+ * Macros imported from <asm/cpufeature.h>:
+ * - cpu_feature(x)		ordinal value of feature called 'x'
+ * - cpu_have_feature(u32 n)	whether feature #n is available
+ * - MAX_CPU_FEATURES		upper bound for feature ordinal values
+ * Optional:
+ * - CPU_FEATURE_TYPEFMT	format string fragment for printing the cpu type
+ * - CPU_FEATURE_TYPEVAL	set of values matching the format string above
+ */
+
+#ifndef CPU_FEATURE_TYPEFMT
+#define CPU_FEATURE_TYPEFMT	"%s"
+#endif
+
+#ifndef CPU_FEATURE_TYPEVAL
+#define CPU_FEATURE_TYPEVAL	ELF_PLATFORM
+#endif
+
+/*
+ * Use module_cpu_feature_match(feature, module_init_function) to
+ * declare that
+ * a) the module shall be probed upon discovery of CPU feature 'feature'
+ *    (typically at boot time using udev)
+ * b) the module must not be loaded if CPU feature 'feature' is not present
+ *    (not even by manual insmod).
+ *
+ * For a list of legal values for 'feature', please consult the file
+ * 'asm/cpufeature.h' of your favorite architecture.
+ */
+#define module_cpu_feature_match(x, __init)			\
+static struct cpu_feature const cpu_feature_match_ ## x[] =	\
+	{ { .feature = cpu_feature(x) }, { } };			\
+MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
+								\
+static int cpu_feature_match_ ## x ## _init(void)		\
+{								\
+	if (!cpu_have_feature(cpu_feature(x)))			\
+		return -ENODEV;					\
+	return __init();					\
+}								\
+module_init(cpu_feature_match_ ## x ## _init)
+
+#endif
+#endif
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1a81b7470bc4..69cb5cc81013 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -307,6 +307,18 @@ struct freq_attr {
 static struct freq_attr _name =			\
 __ATTR(_name, 0444, show_##_name, NULL)
 
+#ifdef CONFIG_CPU_FREQ
+void cpufreq_suspend(void);
+void cpufreq_resume(void);
+#else
+static inline void cpufreq_suspend(void) {}
+static inline void cpufreq_resume(void) {}
+#endif
+
+/*********************************************************************
+ *                     CPUFREQ NOTIFIER INTERFACE                    *
+ *********************************************************************/
+
 #define cpufreq_freq_attr_ro_perm(_name, _perm)	\
 static struct freq_attr _name =			\
 __ATTR(_name, _perm, show_##_name, NULL)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 8f0406230a0a..1312cde68504 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -111,6 +111,7 @@ struct cpuidle_driver {
 	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
 	int			state_count;
 	int			safe_state_index;
+	struct cpumask		*cpumask;
 };
 
 #ifdef CONFIG_CPU_IDLE
diff --git a/include/linux/device.h b/include/linux/device.h
index c0a126125325..c8709398a754 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -26,6 +26,7 @@
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
 #include <linux/uidgid.h>
+#include <linux/gfp.h>
 #include <asm/device.h>
 
 struct device;
@@ -568,8 +569,24 @@ extern void devres_close_group(struct device *dev, void *id);
 extern void devres_remove_group(struct device *dev, void *id);
 extern int devres_release_group(struct device *dev, void *id);
 
-/* managed kzalloc/kfree for device drivers, no kmalloc, always use kzalloc */
-extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
+/* managed devm_k.alloc/kfree for device drivers */
+extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
+static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+{
+	return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
+}
+static inline void *devm_kmalloc_array(struct device *dev,
+				       size_t n, size_t size, gfp_t flags)
+{
+	if (size != 0 && n > SIZE_MAX / size)
+		return NULL;
+	return devm_kmalloc(dev, n * size, flags);
+}
+static inline void *devm_kcalloc(struct device *dev,
+				 size_t n, size_t size, gfp_t flags)
+{
+	return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
+}
 extern void devm_kfree(struct device *dev, void *p);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
@@ -698,7 +715,7 @@ struct device {
 
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
 #endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 01b5c84be828..3b28f937d959 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -57,7 +57,7 @@ struct cma;
 struct page;
 struct device;
 
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 /*
  * There is always at least global CMA area and a few optional device
@@ -67,9 +67,53 @@ struct device;
 
 extern struct cma *dma_contiguous_default_area;
 
+static inline struct cma *dev_get_cma_area(struct device *dev)
+{
+	if (dev && dev->cma_area)
+		return dev->cma_area;
+	return dma_contiguous_default_area;
+}
+
+static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
+{
+	if (dev)
+		dev->cma_area = cma;
+}
+
+static inline void dma_contiguous_set_default(struct cma *cma)
+{
+	dma_contiguous_default_area = cma;
+}
+
 void dma_contiguous_reserve(phys_addr_t addr_limit);
-int dma_declare_contiguous(struct device *dev, phys_addr_t size,
-			   phys_addr_t base, phys_addr_t limit);
+
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma);
+
+/**
+ * dma_declare_contiguous() - reserve area for contiguous memory handling
+ *			      for particular device
+ * @dev:   Pointer to device structure.
+ * @size:  Size of the reserved memory.
+ * @base:  Start address of the reserved memory (optional, 0 for any).
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory for specified device. It should be
+ * called by board specific code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+
+static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
+					 phys_addr_t base, phys_addr_t limit)
+{
+	struct cma *cma;
+	int ret;
+	ret = dma_contiguous_reserve_area(size, base, limit, &cma);
+	if (ret == 0)
+		dev_set_cma_area(dev, cma);
+
+	return ret;
+}
 
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 				       unsigned int order);
@@ -80,8 +124,22 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 
 #define MAX_CMA_AREAS	(0)
 
+static inline struct cma *dev_get_cma_area(struct device *dev)
+{
+	return NULL;
+}
+
+static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { }
+
+static inline void dma_contiguous_set_default(struct cma *cma) { }
+
 static inline void dma_contiguous_reserve(phys_addr_t limit) { }
 
+static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma) {
+	return -ENOSYS;
+}
+
 static inline
 int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 			   phys_addr_t base, phys_addr_t limit)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 94af41858513..48ef6f50d86c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -97,6 +97,30 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 }
 #endif
 
+/*
+ * Set both the DMA mask and the coherent DMA mask to the same thing.
+ * Note that we don't check the return value from dma_set_coherent_mask()
+ * as the DMA API guarantees that the coherent DMA mask can be set to
+ * the same or smaller than the streaming DMA mask.
+ */
+static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask)
+{
+	int rc = dma_set_mask(dev, mask);
+	if (rc == 0)
+		dma_set_coherent_mask(dev, mask);
+	return rc;
+}
+
+/*
+ * Similar to the above, except it deals with the case where the device
+ * does not have dev->dma_mask appropriately setup.
+ */
+static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
+{
+	dev->dma_mask = &dev->coherent_dma_mask;
+	return dma_set_mask_and_coherent(dev, mask);
+}
+
 extern u64 dma_get_required_mask(struct device *dev);
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2bc0ad78d058..ae3426a4879c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -39,6 +39,8 @@
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
 typedef u16 efi_char16_t;		/* UNICODE character */
+typedef u64 efi_physical_addr_t;
+typedef void *efi_handle_t;
 
 
 typedef struct {
@@ -96,6 +98,7 @@ typedef	struct {
 #define EFI_MEMORY_DESCRIPTOR_VERSION	1
 
 #define EFI_PAGE_SHIFT		12
+#define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;
@@ -150,6 +153,102 @@ typedef struct {
 	u8 sets_to_zero;
 } efi_time_cap_t;
 
+typedef struct {
+	efi_table_hdr_t hdr;
+	u32 raise_tpl;
+	u32 restore_tpl;
+	u32 allocate_pages;
+	u32 free_pages;
+	u32 get_memory_map;
+	u32 allocate_pool;
+	u32 free_pool;
+	u32 create_event;
+	u32 set_timer;
+	u32 wait_for_event;
+	u32 signal_event;
+	u32 close_event;
+	u32 check_event;
+	u32 install_protocol_interface;
+	u32 reinstall_protocol_interface;
+	u32 uninstall_protocol_interface;
+	u32 handle_protocol;
+	u32 __reserved;
+	u32 register_protocol_notify;
+	u32 locate_handle;
+	u32 locate_device_path;
+	u32 install_configuration_table;
+	u32 load_image;
+	u32 start_image;
+	u32 exit;
+	u32 unload_image;
+	u32 exit_boot_services;
+	u32 get_next_monotonic_count;
+	u32 stall;
+	u32 set_watchdog_timer;
+	u32 connect_controller;
+	u32 disconnect_controller;
+	u32 open_protocol;
+	u32 close_protocol;
+	u32 open_protocol_information;
+	u32 protocols_per_handle;
+	u32 locate_handle_buffer;
+	u32 locate_protocol;
+	u32 install_multiple_protocol_interfaces;
+	u32 uninstall_multiple_protocol_interfaces;
+	u32 calculate_crc32;
+	u32 copy_mem;
+	u32 set_mem;
+	u32 create_event_ex;
+} __packed efi_boot_services_32_t;
+
+typedef struct {
+	efi_table_hdr_t hdr;
+	u64 raise_tpl;
+	u64 restore_tpl;
+	u64 allocate_pages;
+	u64 free_pages;
+	u64 get_memory_map;
+	u64 allocate_pool;
+	u64 free_pool;
+	u64 create_event;
+	u64 set_timer;
+	u64 wait_for_event;
+	u64 signal_event;
+	u64 close_event;
+	u64 check_event;
+	u64 install_protocol_interface;
+	u64 reinstall_protocol_interface;
+	u64 uninstall_protocol_interface;
+	u64 handle_protocol;
+	u64 __reserved;
+	u64 register_protocol_notify;
+	u64 locate_handle;
+	u64 locate_device_path;
+	u64 install_configuration_table;
+	u64 load_image;
+	u64 start_image;
+	u64 exit;
+	u64 unload_image;
+	u64 exit_boot_services;
+	u64 get_next_monotonic_count;
+	u64 stall;
+	u64 set_watchdog_timer;
+	u64 connect_controller;
+	u64 disconnect_controller;
+	u64 open_protocol;
+	u64 close_protocol;
+	u64 open_protocol_information;
+	u64 protocols_per_handle;
+	u64 locate_handle_buffer;
+	u64 locate_protocol;
+	u64 install_multiple_protocol_interfaces;
+	u64 uninstall_multiple_protocol_interfaces;
+	u64 calculate_crc32;
+	u64 copy_mem;
+	u64 set_mem;
+	u64 create_event_ex;
+} __packed efi_boot_services_64_t;
+
 /*
  * EFI Boot Services table
  */
@@ -157,11 +256,13 @@ typedef struct {
 	efi_table_hdr_t hdr;
 	void *raise_tpl;
 	void *restore_tpl;
-	void *allocate_pages;
-	void *free_pages;
-	void *get_memory_map;
-	void *allocate_pool;
-	void *free_pool;
+	efi_status_t (*allocate_pages)(int, int, unsigned long,
+				       efi_physical_addr_t *);
+	efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long);
+	efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *,
+				       unsigned long *, u32 *);
+	efi_status_t (*allocate_pool)(int, unsigned long, void **);
+	efi_status_t (*free_pool)(void *);
 	void *create_event;
 	void *set_timer;
 	void *wait_for_event;
@@ -171,7 +272,7 @@ typedef struct {
 	void *install_protocol_interface;
 	void *reinstall_protocol_interface;
 	void *uninstall_protocol_interface;
-	void *handle_protocol;
+	efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
 	void *__reserved;
 	void *register_protocol_notify;
 	void *locate_handle;
@@ -181,7 +282,7 @@ typedef struct {
 	void *start_image;
 	void *exit;
 	void *unload_image;
-	void *exit_boot_services;
+	efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long);
 	void *get_next_monotonic_count;
 	void *stall;
 	void *set_watchdog_timer;
@@ -226,6 +327,15 @@ typedef enum {
     EfiPciIoAttributeOperationMaximum
 } EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION;
 
+typedef struct {
+	u32 read;
+	u32 write;
+} efi_pci_io_protocol_access_32_t;
+
+typedef struct {
+	u64 read;
+	u64 write;
+} efi_pci_io_protocol_access_64_t;
 
 typedef struct {
 	void *read;
@@ -233,6 +343,46 @@ typedef struct {
 } efi_pci_io_protocol_access_t;
 
 typedef struct {
+	u32 poll_mem;
+	u32 poll_io;
+	efi_pci_io_protocol_access_32_t mem;
+	efi_pci_io_protocol_access_32_t io;
+	efi_pci_io_protocol_access_32_t pci;
+	u32 copy_mem;
+	u32 map;
+	u32 unmap;
+	u32 allocate_buffer;
+	u32 free_buffer;
+	u32 flush;
+	u32 get_location;
+	u32 attributes;
+	u32 get_bar_attributes;
+	u32 set_bar_attributes;
+	uint64_t romsize;
+	void *romimage;
+} efi_pci_io_protocol_32;
+
+typedef struct {
+	u64 poll_mem;
+	u64 poll_io;
+	efi_pci_io_protocol_access_64_t mem;
+	efi_pci_io_protocol_access_64_t io;
+	efi_pci_io_protocol_access_64_t pci;
+	u64 copy_mem;
+	u64 map;
+	u64 unmap;
+	u64 allocate_buffer;
+	u64 free_buffer;
+	u64 flush;
+	u64 get_location;
+	u64 attributes;
+	u64 get_bar_attributes;
+	u64 set_bar_attributes;
+	uint64_t romsize;
+	void *romimage;
+} efi_pci_io_protocol_64;
+
+typedef struct {
 	void *poll_mem;
 	void *poll_io;
 	efi_pci_io_protocol_access_t mem;
@@ -287,20 +437,56 @@ typedef struct {
 
 typedef struct {
 	efi_table_hdr_t hdr;
-	unsigned long get_time;
-	unsigned long set_time;
-	unsigned long get_wakeup_time;
-	unsigned long set_wakeup_time;
-	unsigned long set_virtual_address_map;
-	unsigned long convert_pointer;
-	unsigned long get_variable;
-	unsigned long get_next_variable;
-	unsigned long set_variable;
-	unsigned long get_next_high_mono_count;
-	unsigned long reset_system;
-	unsigned long update_capsule;
-	unsigned long query_capsule_caps;
-	unsigned long query_variable_info;
+	u32 get_time;
+	u32 set_time;
+	u32 get_wakeup_time;
+	u32 set_wakeup_time;
+	u32 set_virtual_address_map;
+	u32 convert_pointer;
+	u32 get_variable;
+	u32 get_next_variable;
+	u32 set_variable;
+	u32 get_next_high_mono_count;
+	u32 reset_system;
+	u32 update_capsule;
+	u32 query_capsule_caps;
+	u32 query_variable_info;
+} efi_runtime_services_32_t;
+
+typedef struct {
+	efi_table_hdr_t hdr;
+	u64 get_time;
+	u64 set_time;
+	u64 get_wakeup_time;
+	u64 set_wakeup_time;
+	u64 set_virtual_address_map;
+	u64 convert_pointer;
+	u64 get_variable;
+	u64 get_next_variable;
+	u64 set_variable;
+	u64 get_next_high_mono_count;
+	u64 reset_system;
+	u64 update_capsule;
+	u64 query_capsule_caps;
+	u64 query_variable_info;
+} efi_runtime_services_64_t;
+
+typedef struct {
+	efi_table_hdr_t hdr;
+	void *get_time;
+	void *set_time;
+	void *get_wakeup_time;
+	void *set_wakeup_time;
+	void *set_virtual_address_map;
+	void *convert_pointer;
+	void *get_variable;
+	void *get_next_variable;
+	void *set_variable;
+	void *get_next_high_mono_count;
+	void *reset_system;
+	void *update_capsule;
+	void *query_capsule_caps;
+	void *query_variable_info;
 } efi_runtime_services_t;
 
 typedef efi_status_t efi_get_time_t (efi_time_t *tm, efi_time_cap_t *tc);
@@ -389,6 +575,9 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long si
 #define EFI_FILE_SYSTEM_GUID \
     EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define DEVICE_TREE_GUID \
+    EFI_GUID(  0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 )
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -404,6 +593,12 @@ typedef struct {
 	unsigned long table;
 } efi_config_table_t;
 
+typedef struct {
+	efi_guid_t guid;
+	const char *name;
+	unsigned long *ptr;
+} efi_config_table_type_t;
+
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
 
 #define EFI_2_30_SYSTEM_TABLE_REVISION  ((2 << 16) | (30))
@@ -472,6 +667,46 @@ struct efi_memory_map {
 	unsigned long desc_size;
 };
 
+struct efi_fdt_params {
+	u64 system_table;
+	u64 mmap;
+	u32 mmap_size;
+	u32 desc_size;
+	u32 desc_ver;
+};
+
+typedef struct {
+	u32 revision;
+	u32 parent_handle;
+	u32 system_table;
+	u32 device_handle;
+	u32 file_path;
+	u32 reserved;
+	u32 load_options_size;
+	u32 load_options;
+	u32 image_base;
+	__aligned_u64 image_size;
+	unsigned int image_code_type;
+	unsigned int image_data_type;
+	unsigned long unload;
+} efi_loaded_image_32_t;
+
+typedef struct {
+	u32 revision;
+	u64 parent_handle;
+	u64 system_table;
+	u64 device_handle;
+	u64 file_path;
+	u64 reserved;
+	u32 load_options_size;
+	u64 load_options;
+	u64 image_base;
+	__aligned_u64 image_size;
+	unsigned int image_code_type;
+	unsigned int image_data_type;
+	unsigned long unload;
+} efi_loaded_image_64_t;
+
 typedef struct {
 	u32 revision;
 	void *parent_handle;
@@ -488,10 +723,6 @@ typedef struct {
 	unsigned long unload;
 } efi_loaded_image_t;
 
-typedef struct {
-	u64 revision;
-	void *open_volume;
-} efi_file_io_interface_t;
 
 typedef struct {
 	u64 size;
@@ -506,18 +737,56 @@ typedef struct {
 
 typedef struct {
 	u64 revision;
-	void *open;
-	void *close;
+	u32 open;
+	u32 close;
+	u32 delete;
+	u32 read;
+	u32 write;
+	u32 get_position;
+	u32 set_position;
+	u32 get_info;
+	u32 set_info;
+	u32 flush;
+} efi_file_handle_32_t;
+
+typedef struct {
+	u64 revision;
+	u64 open;
+	u64 close;
+	u64 delete;
+	u64 read;
+	u64 write;
+	u64 get_position;
+	u64 set_position;
+	u64 get_info;
+	u64 set_info;
+	u64 flush;
+} efi_file_handle_64_t;
+
+typedef struct _efi_file_handle {
+	u64 revision;
+	efi_status_t (*open)(struct _efi_file_handle *,
+			     struct _efi_file_handle **,
+			     efi_char16_t *, u64, u64);
+	efi_status_t (*close)(struct _efi_file_handle *);
 	void *delete;
-	void *read;
+	efi_status_t (*read)(struct _efi_file_handle *, unsigned long *,
+			     void *);
 	void *write;
 	void *get_position;
 	void *set_position;
-	void *get_info;
+	efi_status_t (*get_info)(struct _efi_file_handle *, efi_guid_t *,
+			unsigned long *, void *);
 	void *set_info;
 	void *flush;
 } efi_file_handle_t;
 
+typedef struct _efi_file_io_interface {
+	u64 revision;
+	int (*open_volume)(struct _efi_file_io_interface *,
+			   efi_file_handle_t **);
+} efi_file_io_interface_t;
+
 #define EFI_FILE_MODE_READ	0x0000000000000001
 #define EFI_FILE_MODE_WRITE	0x0000000000000002
 #define EFI_FILE_MODE_CREATE	0x8000000000000000
@@ -552,6 +821,8 @@ extern struct efi {
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
+	struct efi_memory_map *memmap;
+	unsigned long flags;
 } efi;
 
 static inline int
@@ -587,6 +858,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
 }
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+extern int efi_config_init(efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
@@ -597,8 +869,15 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource,
 extern unsigned long efi_get_time(void);
 extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
+extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
 
+/* Iterate through an efi_memory_map */
+#define for_each_efi_memory_desc(m, md)					   \
+	for ((md) = (m)->map;						   \
+	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
+	     (md) = (void *)(md) + (m)->desc_size)
+
 /**
  * efi_range_is_wc - check the WC bit on an address range
  * @start: starting kvirt address
@@ -636,18 +915,17 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_64BIT		5	/* Is the firmware 64-bit? */
 
 #ifdef CONFIG_EFI
-# ifdef CONFIG_X86
-extern int efi_enabled(int facility);
-# else
-static inline int efi_enabled(int facility)
+/*
+ * Test whether the above EFI_* bits are enabled.
+ */
+static inline bool efi_enabled(int feature)
 {
-	return 1;
+	return test_bit(feature, &efi.flags) != 0;
 }
-# endif
 #else
-static inline int efi_enabled(int facility)
+static inline bool efi_enabled(int feature)
 {
-	return 0;
+	return false;
 }
 #endif
 
@@ -769,8 +1047,10 @@ struct efivars {
  * and we use a page for reading/writing.
  */
 
+#define EFI_VAR_NAME_LEN	1024
+
 struct efi_variable {
-	efi_char16_t  VariableName[1024/sizeof(efi_char16_t)];
+	efi_char16_t  VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
 	efi_guid_t    VendorGuid;
 	unsigned long DataSize;
 	__u8          Data[1024];
@@ -782,6 +1062,26 @@ struct efivar_entry {
 	struct efi_variable var;
 	struct list_head list;
 	struct kobject kobj;
+	bool scanning;
+	bool deleting;
+};
+
+struct efi_simple_text_output_protocol_32 {
+	u32 reset;
+	u32 output_string;
+	u32 test_string;
+};
+
+struct efi_simple_text_output_protocol_64 {
+	u64 reset;
+	u64 output_string;
+	u64 test_string;
+};
+
+struct efi_simple_text_output_protocol {
+	void *reset;
+	efi_status_t (*output_string)(void *, void *);
+	void *test_string;
 };
 
 extern struct list_head efivar_sysfs_list;
@@ -832,7 +1132,7 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
 struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 				       struct list_head *head, bool remove);
 
-bool efivar_validate(struct efi_variable *var, u8 *data, unsigned long len);
+bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
@@ -840,6 +1140,8 @@ void efivar_run_worker(void);
 #if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE)
 int efivars_sysfs_init(void);
 
+#define EFIVARS_DATA_SIZE_MAX 1024
+
 #endif /* CONFIG_EFI_VARS */
 
 #endif /* _LINUX_EFI_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7a13848d635c..38eb5dbbf1cb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -611,25 +611,27 @@ static inline void __ftrace_enabled_restore(int enabled)
 #endif
 }
 
-#ifndef HAVE_ARCH_CALLER_ADDR
+/* All archs should have this, but we define it for consistency */
+#ifndef ftrace_return_address0
+# define ftrace_return_address0 __builtin_return_address(0)
+#endif
+
+/* Archs may use other ways for ADDR1 and beyond */
+#ifndef ftrace_return_address
 # ifdef CONFIG_FRAME_POINTER
-#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#  define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
-#  define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
-#  define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
-#  define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
-#  define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
-#  define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
+#  define ftrace_return_address(n) __builtin_return_address(n)
 # else
-#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#  define CALLER_ADDR1 0UL
-#  define CALLER_ADDR2 0UL
-#  define CALLER_ADDR3 0UL
-#  define CALLER_ADDR4 0UL
-#  define CALLER_ADDR5 0UL
-#  define CALLER_ADDR6 0UL
+#  define ftrace_return_address(n) 0UL
 # endif
-#endif /* ifndef HAVE_ARCH_CALLER_ADDR */
+#endif
+
+#define CALLER_ADDR0 ((unsigned long)ftrace_return_address0)
+#define CALLER_ADDR1 ((unsigned long)ftrace_return_address(1))
+#define CALLER_ADDR2 ((unsigned long)ftrace_return_address(2))
+#define CALLER_ADDR3 ((unsigned long)ftrace_return_address(3))
+#define CALLER_ADDR4 ((unsigned long)ftrace_return_address(4))
+#define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
+#define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
 #ifdef CONFIG_IRQSOFF_TRACER
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a193bb3e4138..3d36eb0ff0d8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
 	} while (0)
 extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd);
-#if HPAGE_PMD_ORDER > MAX_ORDER
+#if HPAGE_PMD_ORDER >= MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
 extern int hugepage_madvise(struct vm_area_struct *vma,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c8958f563116..dca09c0cd3cc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -70,6 +70,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 void copy_huge_page(struct page *dst, struct page *src);
 
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
+#endif
+
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
new file mode 100644
index 000000000000..03a4ea37ba86
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
+#define __LINUX_IRQCHIP_ARM_GIC_V3_H
+
+#include <asm/sysreg.h>
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR			0x0000
+#define GICD_TYPER			0x0004
+#define GICD_IIDR			0x0008
+#define GICD_STATUSR			0x0010
+#define GICD_SETSPI_NSR			0x0040
+#define GICD_CLRSPI_NSR			0x0048
+#define GICD_SETSPI_SR			0x0050
+#define GICD_CLRSPI_SR			0x0058
+#define GICD_SEIR			0x0068
+#define GICD_ISENABLER			0x0100
+#define GICD_ICENABLER			0x0180
+#define GICD_ISPENDR			0x0200
+#define GICD_ICPENDR			0x0280
+#define GICD_ISACTIVER			0x0300
+#define GICD_ICACTIVER			0x0380
+#define GICD_IPRIORITYR			0x0400
+#define GICD_ICFGR			0x0C00
+#define GICD_IROUTER			0x6000
+#define GICD_PIDR2			0xFFE8
+
+#define GICD_CTLR_RWP			(1U << 31)
+#define GICD_CTLR_ARE_NS		(1U << 4)
+#define GICD_CTLR_ENABLE_G1A		(1U << 1)
+#define GICD_CTLR_ENABLE_G1		(1U << 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK		0xf0
+#define GIC_PIDR2_ARCH_GICv3		0x30
+#define GIC_PIDR2_ARCH_GICv4		0x40
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR			GICD_CTLR
+#define GICR_IIDR			0x0004
+#define GICR_TYPER			0x0008
+#define GICR_STATUSR			GICD_STATUSR
+#define GICR_WAKER			0x0014
+#define GICR_SETLPIR			0x0040
+#define GICR_CLRLPIR			0x0048
+#define GICR_SEIR			GICD_SEIR
+#define GICR_PROPBASER			0x0070
+#define GICR_PENDBASER			0x0078
+#define GICR_INVLPIR			0x00A0
+#define GICR_INVALLR			0x00B0
+#define GICR_SYNCR			0x00C0
+#define GICR_MOVLPIR			0x0100
+#define GICR_MOVALLR			0x0110
+#define GICR_PIDR2			GICD_PIDR2
+
+#define GICR_WAKER_ProcessorSleep	(1U << 1)
+#define GICR_WAKER_ChildrenAsleep	(1U << 2)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_ISENABLER0			GICD_ISENABLER
+#define GICR_ICENABLER0			GICD_ICENABLER
+#define GICR_ISPENDR0			GICD_ISPENDR
+#define GICR_ICPENDR0			GICD_ICPENDR
+#define GICR_ISACTIVER0			GICD_ISACTIVER
+#define GICR_ICACTIVER0			GICD_ICACTIVER
+#define GICR_IPRIORITYR0		GICD_IPRIORITYR
+#define GICR_ICFGR0			GICD_ICFGR
+
+#define GICR_TYPER_VLPIS		(1U << 1)
+#define GICR_TYPER_LAST			(1U << 4)
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_drop_dir	(0U << 1)
+#define ICC_CTLR_EL1_EOImode_drop	(1U << 1)
+#define ICC_SRE_EL1_SRE			(1U << 0)
+
+/*
+ * Hypervisor interface registers (SRE only)
+ */
+#define ICH_LR_VIRTUAL_ID_MASK		((1UL << 32) - 1)
+
+#define ICH_LR_EOI			(1UL << 41)
+#define ICH_LR_GROUP			(1UL << 60)
+#define ICH_LR_STATE			(3UL << 62)
+#define ICH_LR_PENDING_BIT		(1UL << 62)
+#define ICH_LR_ACTIVE_BIT		(1UL << 63)
+
+#define ICH_MISR_EOI			(1 << 0)
+#define ICH_MISR_U			(1 << 1)
+
+#define ICH_HCR_EN			(1 << 0)
+#define ICH_HCR_UIE			(1 << 1)
+
+#define ICH_VMCR_CTLR_SHIFT		0
+#define ICH_VMCR_CTLR_MASK		(0x21f << ICH_VMCR_CTLR_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT		18
+#define ICH_VMCR_BPR1_MASK		(7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT		21
+#define ICH_VMCR_BPR0_MASK		(7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT		24
+#define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
+
+#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
+#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
+#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
+
+#define ICC_IAR1_EL1_SPURIOUS		0x3ff
+
+#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
+
+#define ICC_SRE_EL2_SRE			(1 << 0)
+#define ICC_SRE_EL2_ENABLE		(1 << 3)
+
+/*
+ * System register definitions
+ */
+#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
+#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
+#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
+#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
+#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
+
+#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
+#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
+
+#define ICH_LR0_EL2			__LR0_EL2(0)
+#define ICH_LR1_EL2			__LR0_EL2(1)
+#define ICH_LR2_EL2			__LR0_EL2(2)
+#define ICH_LR3_EL2			__LR0_EL2(3)
+#define ICH_LR4_EL2			__LR0_EL2(4)
+#define ICH_LR5_EL2			__LR0_EL2(5)
+#define ICH_LR6_EL2			__LR0_EL2(6)
+#define ICH_LR7_EL2			__LR0_EL2(7)
+#define ICH_LR8_EL2			__LR8_EL2(0)
+#define ICH_LR9_EL2			__LR8_EL2(1)
+#define ICH_LR10_EL2			__LR8_EL2(2)
+#define ICH_LR11_EL2			__LR8_EL2(3)
+#define ICH_LR12_EL2			__LR8_EL2(4)
+#define ICH_LR13_EL2			__LR8_EL2(5)
+#define ICH_LR14_EL2			__LR8_EL2(6)
+#define ICH_LR15_EL2			__LR8_EL2(7)
+
+#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
+#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
+#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
+#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
+#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
+
+#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
+#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
+#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
+#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
+#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+static inline void gic_write_eoir(u64 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
+#endif
+
+#endif
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23cc7..1f004b16641e 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,11 @@
 #define GIC_CPU_EOI			0x10
 #define GIC_CPU_RUNNINGPRI		0x14
 #define GIC_CPU_HIGHPRI			0x18
+#define GIC_CPU_ALIAS_BINPOINT		0x1c
+#define GIC_CPU_ACTIVEPRIO		0xd0
+#define GIC_CPU_IDENT			0xfc
+
+#define GICC_IAR_INT_ID_MASK		0x3ff
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
@@ -31,6 +36,8 @@
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
+#define GIC_DIST_SGI_PENDING_SET	0xf20
 
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
@@ -54,6 +61,15 @@
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
 
+#define GICH_VMCR_CTRL_SHIFT		0
+#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_PRIMASK_SHIFT		27
+#define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
+#define GICH_VMCR_BINPOINT_SHIFT	21
+#define GICH_VMCR_BINPOINT_MASK		(0x7 << GICH_VMCR_BINPOINT_SHIFT)
+#define GICH_VMCR_ALIAS_BINPOINT_SHIFT	18
+#define GICH_VMCR_ALIAS_BINPOINT_MASK	(0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
+
 #define GICH_MISR_EOI			(1 << 0)
 #define GICH_MISR_U			(1 << 1)
 
@@ -66,6 +82,9 @@ extern struct irq_chip gic_arch_extn;
 void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
 		    u32 offset, struct device_node *);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
+void gic_cpu_if_down(void);
+
+void gic_cpu_if_down(void);
 
 static inline void gic_init(unsigned int nr, int start,
 			    void __iomem *dist , void __iomem *cpu)
@@ -73,6 +92,16 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
-#endif /* __ASSEMBLY */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
+int gic_get_cpu_id(unsigned int cpu);
+void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
 
+extern const struct irq_domain_ops *gic_routable_irq_domain_ops;
+static inline void __init register_routable_domain_ops
+					(const struct irq_domain_ops *ops)
+{
+	gic_routable_irq_domain_ops = ops;
+}
+#endif /* __ASSEMBLY */
 #endif
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 078bc2fc74ff..a8cc2fcffcaf 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -158,6 +158,14 @@ static inline int irq_balancing_disabled(unsigned int irq)
 	return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
 }
 
+static inline int irq_is_percpu(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status_use_accessors & IRQ_PER_CPU;
+}
+
 static inline void
 irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8db53cfaccdb..f64e941a4213 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,11 +129,9 @@ static inline bool is_error_page(struct page *page)
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-struct kvm;
-struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
@@ -175,13 +173,12 @@ struct kvm_async_pf {
 	gva_t gva;
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
-	struct page *page;
-	bool done;
+	bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
 void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch);
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
@@ -302,25 +299,6 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	struct kvm_kernel_irq_routing_entry *rt_entries;
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
-
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -347,6 +325,7 @@ struct kvm {
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 #endif
@@ -377,11 +356,12 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	/*
-	 * Update side is protected by irq_lock and,
-	 * if configured, irqfds.lock.
+	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
@@ -431,7 +411,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
 #else
@@ -450,8 +430,6 @@ void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation);
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 {
@@ -494,9 +472,11 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
+void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
@@ -518,10 +498,12 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
-void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
@@ -533,7 +515,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
 
-void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
@@ -560,14 +541,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
-void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
@@ -582,15 +560,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				struct kvm_dirty_log *log);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem);
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 			bool line_status);
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -622,6 +598,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
@@ -630,16 +608,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
-void kvm_free_physmem(struct kvm *kvm);
-
 void *kvm_kvzalloc(unsigned long size);
 void kvm_kvfree(const void *addr);
 
@@ -717,6 +693,10 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
+
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -773,7 +753,7 @@ static inline void kvm_guest_enter(void)
 
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
-	 * is very similar to exiting to userspase from rcu point of view. In
+	 * is very similar to exiting to userspace from rcu point of view. In
 	 * addition CPU may stay in a guest mode for quite a long time (up to
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
@@ -826,13 +806,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {
@@ -856,6 +829,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
@@ -906,28 +886,27 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -949,10 +928,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
-					  struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
 }
 #endif
 
@@ -1013,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
-struct kvm_device_ops;
-
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
@@ -1047,6 +1022,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
@@ -1071,12 +1047,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
 static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 {
 }
-
-static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
-{
-	return true;
-}
-
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #endif
 
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
 #include <asm/types.h>
 
 /*
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev:		The client device
+ * @tx_block:		If the mbox_send_message should block until data is
+ *			transmitted.
+ * @tx_tout:		Max block period in ms before TX is assumed failure
+ * @knows_txdone:	If the client could run the TX state machine. Usually
+ *			if the client receives some ACK packet for transmission.
+ *			Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_done:		Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+	struct device *dev;
+	bool tx_block;
+	unsigned long tx_tout;
+	bool knows_txdone;
+
+	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..9ee195b02444
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,135 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data:	The API asks the MBOX controller driver, in atomic
+ *		context try to transmit a message on the bus. Returns 0 if
+ *		data is accepted for transmission, -EBUSY while rejecting
+ *		if the remote hasn't yet read the last data sent. Actual
+ *		transmission of data is reported by the controller via
+ *		mbox_chan_txdone (if it has some TX ACK irq). It must not
+ *		sleep.
+ * @startup:	Called when a client requests the chan. The controller
+ *		could ask clients for additional parameters of communication
+ *		to be provided via client's chan_data. This call may
+ *		block. After this call the Controller must forward any
+ *		data received on the chan by calling mbox_chan_received_data.
+ *		The controller may do stuff that need to sleep.
+ * @shutdown:	Called when a client relinquishes control of a chan.
+ *		This call may block too. The controller must not forward
+ *		any received data anymore.
+ *		The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ *		  this to poll status of last TX. The controller must
+ *		  give priority to IRQ method over polling and never
+ *		  set both txdone_poll and txdone_irq. Only in polling
+ *		  mode 'send_data' is expected to return -EBUSY.
+ *		  The controller may do stuff that need to sleep/block.
+ *		  Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ *		  has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+	int (*send_data)(struct mbox_chan *chan, void *data);
+	int (*startup)(struct mbox_chan *chan);
+	void (*shutdown)(struct mbox_chan *chan);
+	bool (*last_tx_done)(struct mbox_chan *chan);
+	bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev:		Device backing this controller
+ * @ops:		Operators that work on each communication chan
+ * @chans:		Array of channels
+ * @num_chans:		Number of channels in the 'chans' array.
+ * @txdone_irq:		Indicates if the controller can report to API when
+ *			the last transmitted data was read by the remote.
+ *			Eg, if it has some TX ACK irq.
+ * @txdone_poll:	If the controller can read but not report the TX
+ *			done. Ex, some register shows the TX status but
+ *			no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
+ *			last TX's status after these many millisecs
+ * @of_xlate:		Controller driver specific mapping of channel via DT
+ * @poll:		API private. Used to poll for TXDONE on all channels.
+ * @period:		API private. Polling period.
+ * @node:		API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+	struct device *dev;
+	struct mbox_chan_ops *ops;
+	struct mbox_chan *chans;
+	int num_chans;
+	bool txdone_irq;
+	bool txdone_poll;
+	unsigned txpoll_period;
+	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+				      const struct of_phandle_args *sp);
+	/* Internal to API */
+	struct timer_list poll;
+	unsigned period;
+	struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN	20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox:		Pointer to the parent/provider of this channel
+ * @txdone_method:	Way to detect TXDone chosen by the API
+ * @cl:			Pointer to the current owner of this channel
+ * @tx_complete:	Transmission completion
+ * @active_req:		Currently active request hook
+ * @msg_count:		No. of mssg currently queued
+ * @msg_free:		Index of next available mssg slot
+ * @msg_data:		Hook for data packet
+ * @lock:		Serialise access to the channel
+ * @con_priv:		Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+	struct mbox_controller *mbox;
+	unsigned txdone_method;
+	struct mbox_client *cl;
+	struct completion tx_complete;
+	void *active_req;
+	unsigned msg_count, msg_free;
+	void *msg_data[MBOX_TX_QUEUE_LEN];
+	spinlock_t lock; /* Serialise access to the channel */
+	void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7da14357aa76..fc02c749ae77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
+/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
+#define PAGE_ALIGNED(addr)	IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b3bd7e737e8b..08450a6146e1 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -563,6 +563,15 @@ struct x86_cpu_id {
 #define X86_MODEL_ANY  0
 #define X86_FEATURE_ANY 0	/* Same as FPU, you can't test for that */
 
+/*
+ * Generic table type for matching CPU features.
+ * @feature:	the bit number of the feature (0 - 65535)
+ */
+
+struct cpu_feature {
+	__u16	feature;
+};
+
 #define IPACK_ANY_FORMAT 0xff
 #define IPACK_ANY_ID (~0)
 struct ipack_device_id {
diff --git a/include/linux/of.h b/include/linux/of.h
index 5e9d35233a65..078a187d518c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -264,6 +264,7 @@ extern int of_device_is_available(const struct device_node *device);
 extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
+extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 #define for_each_property_of_node(dn, pp) \
 	for (pp = dn->properties; pp != NULL; pp = pp->next)
 
@@ -341,6 +342,8 @@ const char *of_prop_next_string(struct property *prop, const char *cur);
 		s;						\
 		s = of_prop_next_string(prop, s))
 
+int of_device_is_stdout_path(struct device_node *dn);
+
 #else /* CONFIG_OF */
 
 static inline const char* of_node_full_name(struct device_node *np)
@@ -451,6 +454,12 @@ static inline const void *of_get_property(const struct device_node *node,
 	return NULL;
 }
 
+static inline struct device_node *of_get_cpu_node(int cpu,
+					unsigned int *thread)
+{
+	return NULL;
+}
+
 static inline int of_property_read_u64(const struct device_node *np,
 				       const char *propname, u64 *out_value)
 {
@@ -497,6 +506,11 @@ static inline int of_machine_is_compatible(const char *compat)
 	return 0;
 }
 
+static inline int of_device_is_stdout_path(struct device_node *dn)
+{
+	return 0;
+}
+
 #define of_match_ptr(_ptr)	NULL
 #define of_match_node(_matches, _node)	NULL
 #define of_property_for_each_u32(np, propname, prop, p, u) \
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ed136ad698ce..c9722fdf39c5 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -66,7 +66,7 @@ extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset);
 extern void *of_fdt_get_property(struct boot_param_header *blob,
 				 unsigned long node,
 				 const char *name,
-				 unsigned long *size);
+				 int *size);
 extern int of_fdt_is_compatible(struct boot_param_header *blob,
 				unsigned long node,
 				const char *compat);
@@ -81,12 +81,11 @@ extern int __initdata dt_root_size_cells;
 extern struct boot_param_header *initial_boot_params;
 
 /* For scanning the flat device-tree at boot time */
-extern char *find_flat_dt_string(u32 offset);
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
-extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size);
+extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
+				       int *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
@@ -96,9 +95,12 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
+extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
+extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
+					     bool no_map);
 extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
-extern u64 dt_mem_next_cell(int s, __be32 **cellp);
+extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
 
 /*
  * If BLK_DEV_INITRD, the fdt early init code will call this function,
@@ -106,8 +108,7 @@ extern u64 dt_mem_next_cell(int s, __be32 **cellp);
  * physical addresses.
  */
 #ifdef CONFIG_BLK_DEV_INITRD
-extern void early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end);
+extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
 #endif
 
 /* Early flat tree scan hooks */
@@ -118,6 +119,8 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
 #else /* CONFIG_OF_FLATTREE */
+static inline void early_init_fdt_scan_reserved_mem(void) {}
+static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
 static inline void unflatten_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
 
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
new file mode 100644
index 000000000000..9b1fbb7f29fc
--- /dev/null
+++ b/include/linux/of_reserved_mem.h
@@ -0,0 +1,53 @@
+#ifndef __OF_RESERVED_MEM_H
+#define __OF_RESERVED_MEM_H
+
+struct device;
+struct of_phandle_args;
+struct reserved_mem_ops;
+
+struct reserved_mem {
+	const char			*name;
+	unsigned long			fdt_node;
+	const struct reserved_mem_ops	*ops;
+	phys_addr_t			base;
+	phys_addr_t			size;
+	void				*priv;
+};
+
+struct reserved_mem_ops {
+	void	(*device_init)(struct reserved_mem *rmem,
+			       struct device *dev);
+	void	(*device_release)(struct reserved_mem *rmem,
+				  struct device *dev);
+};
+
+typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem,
+				      unsigned long node, const char *uname);
+
+#ifdef CONFIG_OF_RESERVED_MEM
+void fdt_init_reserved_mem(void);
+void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
+			       phys_addr_t base, phys_addr_t size);
+
+#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
+	static const struct of_device_id __reservedmem_of_table_##name	\
+		__used __section(__reservedmem_of_table)		\
+		 = { .compatible = compat,				\
+		     .data = (init == (reservedmem_of_init_fn)NULL) ?	\
+				init : init }
+
+#else
+static inline void fdt_init_reserved_mem(void) { }
+static inline void fdt_reserved_mem_save_node(unsigned long node,
+		const char *uname, phys_addr_t base, phys_addr_t size) { }
+
+#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
+	static const struct of_device_id __reservedmem_of_table_##name	\
+		__attribute__((unused))					\
+		 = { .compatible = compat,				\
+		     .data = (init == (reservedmem_of_init_fn)NULL) ?	\
+				init : init }
+
+#endif
+
+#endif /* __OF_RESERVED_MEM_H */
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 4aad3cea69ae..04c760d56bb8 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -158,47 +158,4 @@ static inline struct pinctrl * __must_check devm_pinctrl_get_select_default(
 	return devm_pinctrl_get_select(dev, PINCTRL_STATE_DEFAULT);
 }
 
-#ifdef CONFIG_PINCONF
-
-extern int pin_config_get(const char *dev_name, const char *name,
-			  unsigned long *config);
-extern int pin_config_set(const char *dev_name, const char *name,
-			  unsigned long config);
-extern int pin_config_group_get(const char *dev_name,
-				const char *pin_group,
-				unsigned long *config);
-extern int pin_config_group_set(const char *dev_name,
-				const char *pin_group,
-				unsigned long config);
-
-#else
-
-static inline int pin_config_get(const char *dev_name, const char *name,
-				 unsigned long *config)
-{
-	return 0;
-}
-
-static inline int pin_config_set(const char *dev_name, const char *name,
-				 unsigned long config)
-{
-	return 0;
-}
-
-static inline int pin_config_group_get(const char *dev_name,
-				       const char *pin_group,
-				       unsigned long *config)
-{
-	return 0;
-}
-
-static inline int pin_config_group_set(const char *dev_name,
-				       const char *pin_group,
-				       unsigned long config)
-{
-	return 0;
-}
-
-#endif
-
 #endif /* __LINUX_PINCTRL_CONSUMER_H */
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 6aa238096622..201e68de2d11 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -29,12 +29,21 @@
  *	if for example some other pin is going to drive the signal connected
  *	to it for a while. Pins used for input are usually always high
  *	impedance.
+ * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it
+ *	weakly drives the last value on a tristate bus, also known as a "bus
+ *	holder", "bus keeper" or "repeater". This allows another device on the
+ *	bus to change the value by driving the bus high or low and switching to
+ *	tristate. The argument is ignored.
  * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
  *	impedance to VDD). If the argument is != 0 pull-up is enabled,
  *	if it is 0, pull-up is disabled.
  * @PIN_CONFIG_BIAS_PULL_DOWN: the pin will be pulled down (usually with high
  *	impedance to GROUND). If the argument is != 0 pull-down is enabled,
  *	if it is 0, pull-down is disabled.
+ * @PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: the pin will be pulled up or down based
+ *	on embedded knowledge of the controller, like current mux function.
+ *	If the argument is != 0 pull up/down is enabled, if it is 0,
+ *	the pull is disabled.
  * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and
  *	low, this is the most typical case and is typically achieved with two
  *	active transistors on the output. Setting this config will enable
@@ -48,6 +57,9 @@
  *	argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current
  *	passed as argument. The argument is in mA.
+ * @PIN_CONFIG_INPUT_ENABLE: enable the pin's input.  Note that this does not
+ *	affect the pin's ability to drive output.  1 enables input, 0 disables
+ *	input.
  * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
  *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
  *      schmitt-trigger mode is disabled.
@@ -57,7 +69,7 @@
  *	setting pins to this mode.
  * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode,
  *	which means it will wait for signals to settle when reading inputs. The
- *	argument gives the debounce time on a custom format. Setting the
+ *	argument gives the debounce time in usecs. Setting the
  *	argument to zero turns debouncing off.
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
@@ -78,12 +90,15 @@
 enum pin_config_param {
 	PIN_CONFIG_BIAS_DISABLE,
 	PIN_CONFIG_BIAS_HIGH_IMPEDANCE,
+	PIN_CONFIG_BIAS_BUS_HOLD,
 	PIN_CONFIG_BIAS_PULL_UP,
 	PIN_CONFIG_BIAS_PULL_DOWN,
+	PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 	PIN_CONFIG_DRIVE_PUSH_PULL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_STRENGTH,
+	PIN_CONFIG_INPUT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
@@ -122,6 +137,58 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param
 	return PIN_CONF_PACKED(param, argument);
 }
 
+#ifdef CONFIG_OF
+
+#include <linux/device.h>
+#include <linux/pinctrl/machine.h>
+struct pinctrl_dev;
+struct pinctrl_map;
+
+int pinconf_generic_dt_subnode_to_map_new(struct pinctrl_dev *pctldev,
+		struct device_node *np, struct pinctrl_map **map,
+		unsigned *reserved_maps, unsigned *num_maps,
+		enum pinctrl_map_type type);
+int pinconf_generic_dt_node_to_map_new(struct pinctrl_dev *pctldev,
+		struct device_node *np_config, struct pinctrl_map **map,
+		unsigned *num_maps, enum pinctrl_map_type type);
+
+static inline int pinconf_generic_dt_node_to_map_group(
+		struct pinctrl_dev *pctldev, struct device_node *np_config,
+		struct pinctrl_map **map, unsigned *num_maps)
+{
+	return pinconf_generic_dt_node_to_map_new(pctldev, np_config, map, num_maps,
+			PIN_MAP_TYPE_CONFIGS_GROUP);
+}
+
+static inline int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+		struct device_node *np, struct pinctrl_map **map,
+		unsigned *reserved_maps, unsigned *num_maps)
+{
+	return pinconf_generic_dt_subnode_to_map_new(pctldev, np, map,
+						     reserved_maps, num_maps,
+						     PIN_MAP_TYPE_CONFIGS_PIN);
+}
+
+static inline int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev,
+		struct device_node *np_config, struct pinctrl_map **map,
+		unsigned *num_maps)
+{
+	return pinconf_generic_dt_node_to_map_new(pctldev, np_config,
+						  map, num_maps,
+						  PIN_MAP_TYPE_CONFIGS_PIN);
+}
+
+
+static inline int pinconf_generic_dt_node_to_map_pin(
+		struct pinctrl_dev *pctldev, struct device_node *np_config,
+		struct pinctrl_map **map, unsigned *num_maps)
+{
+	return pinconf_generic_dt_node_to_map_new(pctldev, np_config, map, num_maps,
+						  PIN_MAP_TYPE_CONFIGS_PIN);
+}
+
+#endif
+
 #endif /* CONFIG_GENERIC_PINCONF */
 
 #endif /* __LINUX_PINCTRL_PINCONF_GENERIC_H */
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 1ad4f31ef6b8..e6a68e476aef 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -48,12 +48,20 @@ struct pinconf_ops {
 	int (*pin_config_set) (struct pinctrl_dev *pctldev,
 			       unsigned pin,
 			       unsigned long config);
+	int (*pin_config_set_bulk) (struct pinctrl_dev *pctldev,
+				    unsigned pin,
+				    unsigned long *configs,
+				    unsigned num_configs);
 	int (*pin_config_group_get) (struct pinctrl_dev *pctldev,
 				     unsigned selector,
 				     unsigned long *config);
 	int (*pin_config_group_set) (struct pinctrl_dev *pctldev,
 				     unsigned selector,
 				     unsigned long config);
+	int (*pin_config_group_set_bulk) (struct pinctrl_dev *pctldev,
+					  unsigned selector,
+					  unsigned long *configs,
+					  unsigned num_configs);
 	int (*pin_config_dbg_parse_modify) (struct pinctrl_dev *pctldev,
 					   const char *arg,
 					   unsigned long *config);
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 2c2a9e8d8578..9adb1067af71 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -32,10 +32,12 @@ struct device_node;
  * pins, pads or other muxable units in this struct
  * @number: unique pin number from the global pin number space
  * @name: a name for this pin
+ * @drv_data: driver-defined per-pin data. pinctrl core does not touch this
  */
 struct pinctrl_pin_desc {
 	unsigned number;
 	const char *name;
+	void *drv_data;
 };
 
 /* Convenience macro to define a single named or anonymous pin descriptor */
diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h
index 5161f63ec1c8..5161f63ec1c8 100644
--- a/include/linux/mailbox.h
+++ b/include/linux/pl320-ipc.h
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f87e9a8d364f..1e287bacafc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -890,6 +890,13 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
 
 bool cpus_share_cache(int this_cpu, int that_cpu);
 
+#ifdef CONFIG_SCHED_HMP
+struct hmp_domain {
+	struct cpumask cpus;
+	struct cpumask possible_cpus;
+	struct list_head hmp_domains;
+};
+#endif /* CONFIG_SCHED_HMP */
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -936,8 +943,22 @@ struct sched_avg {
 	u64 last_runnable_update;
 	s64 decay_count;
 	unsigned long load_avg_contrib;
+	unsigned long load_avg_ratio;
+#ifdef CONFIG_SCHED_HMP
+	u64 hmp_last_up_migration;
+	u64 hmp_last_down_migration;
+#endif
+	u32 usage_avg_sum;
 };
 
+#ifdef CONFIG_SCHED_HMP
+/*
+ * We want to avoid boosting any processes forked from init (PID 1)
+ * and kthreadd (assumed to be PID 2).
+ */
+#define hmp_task_should_forkboost(task) ((task->parent && task->parent->pid > 2))
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
 	u64			wait_start;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 87d4bbc773fc..c2b355fd921a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -279,6 +279,22 @@ static inline int uart_poll_timeout(struct uart_port *port)
 /*
  * Console helpers.
  */
+struct earlycon_device {
+	struct console *con;
+	struct uart_port port;
+	char options[16];		/* e.g., 115200n8 */
+	unsigned int baud;
+};
+int setup_earlycon(char *buf, const char *match,
+		   int (*setup)(struct earlycon_device *, const char *));
+
+#define EARLYCON_DECLARE(name, func) \
+static int __init name ## _setup_earlycon(char *buf) \
+{ \
+	return setup_earlycon(buf, __stringify(name), func); \
+} \
+early_param("earlycon", name ## _setup_earlycon);
+
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
 void uart_parse_options(char *options, int *baud, int *parity, int *bits,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index a386a1cbb6e1..55fce47b0095 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -25,6 +25,7 @@
 #ifndef __THERMAL_H__
 #define __THERMAL_H__
 
+#include <linux/of.h>
 #include <linux/idr.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
@@ -143,6 +144,7 @@ struct thermal_cooling_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
 	struct device device;
+	struct device_node *np;
 	void *devdata;
 	const struct thermal_cooling_device_ops *ops;
 	bool updated; /* true if the cooling device does not need update */
@@ -172,7 +174,7 @@ struct thermal_zone_device {
 	int emul_temperature;
 	int passive;
 	unsigned int forced_passive;
-	const struct thermal_zone_device_ops *ops;
+	struct thermal_zone_device_ops *ops;
 	const struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
 	struct list_head thermal_instances;
@@ -214,6 +216,14 @@ struct thermal_bind_params {
 /* Structure to define Thermal Zone parameters */
 struct thermal_zone_params {
 	char governor_name[THERMAL_NAME_LENGTH];
+
+	/*
+	 * a boolean to indicate if the thermal to hwmon sysfs interface
+	 * is required. when no_hwmon == false, a hwmon sysfs interface
+	 * will be created. when no_hwmon == true, nothing will be done
+	 */
+	bool no_hwmon;
+
 	int num_tbps;	/* Number of tbp entries */
 	struct thermal_bind_params *tbp;
 };
@@ -224,8 +234,31 @@ struct thermal_genl_event {
 };
 
 /* Function declarations */
+#ifdef CONFIG_THERMAL_OF
+struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int id,
+				void *data, int (*get_temp)(void *, long *),
+				int (*get_trend)(void *, long *));
+void thermal_zone_of_sensor_unregister(struct device *dev,
+				       struct thermal_zone_device *tz);
+#else
+static inline struct thermal_zone_device *
+thermal_zone_of_sensor_register(struct device *dev, int id,
+				void *data, int (*get_temp)(void *, long *),
+				int (*get_trend)(void *, long *))
+{
+	return NULL;
+}
+
+static inline
+void thermal_zone_of_sensor_unregister(struct device *dev,
+				       struct thermal_zone_device *tz)
+{
+}
+
+#endif
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
-		void *, const struct thermal_zone_device_ops *,
+		void *, struct thermal_zone_device_ops *,
 		const struct thermal_zone_params *, int, int);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
@@ -238,6 +271,9 @@ void thermal_zone_device_update(struct thermal_zone_device *);
 
 struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 		const struct thermal_cooling_device_ops *);
+struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np, char *, void *,
+				   const struct thermal_cooling_device_ops *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
 int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp);
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index ea7168a68081..c1191ab4cb98 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -68,7 +68,8 @@
  */
 struct vexpress_config_bridge_info {
 	const char *name;
-	void *(*func_get)(struct device *dev, struct device_node *node);
+	void *(*func_get)(struct device *dev, struct device_node *node,
+			  const char *id);
 	void (*func_put)(void *func);
 	int (*func_exec)(void *func, int offset, bool write, u32 *data);
 };
@@ -87,12 +88,17 @@ void vexpress_config_complete(struct vexpress_config_bridge *bridge,
 
 struct vexpress_config_func;
 
-struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
-		struct device_node *node);
+struct vexpress_config_func *__vexpress_config_func_get(
+		struct vexpress_config_bridge *bridge,
+		struct device *dev,
+		struct device_node *node,
+		const char *id);
+#define vexpress_config_func_get(bridge, id) \
+		__vexpress_config_func_get(bridge, NULL, NULL, id)
 #define vexpress_config_func_get_by_dev(dev) \
-		__vexpress_config_func_get(dev, NULL)
+		__vexpress_config_func_get(NULL, dev, NULL, NULL)
 #define vexpress_config_func_get_by_node(node) \
-		__vexpress_config_func_get(NULL, node)
+		__vexpress_config_func_get(NULL, NULL, node, NULL)
 void vexpress_config_func_put(struct vexpress_config_func *func);
 
 /* Both may sleep! */
@@ -120,7 +126,53 @@ void vexpress_sysreg_of_early_init(void);
 struct clk *vexpress_osc_setup(struct device *dev);
 void vexpress_osc_of_setup(struct device_node *node);
 
+struct clk *vexpress_clk_register_spc(const char *name, int cluster_id);
+void vexpress_clk_of_register_spc(void);
+
 void vexpress_clk_init(void __iomem *sp810_base);
 void vexpress_clk_of_init(void);
 
+/* SPC */
+
+#define	VEXPRESS_SPC_WAKE_INTR_IRQ(cluster, cpu) \
+			(1 << (4 * (cluster) + (cpu)))
+#define	VEXPRESS_SPC_WAKE_INTR_FIQ(cluster, cpu) \
+			(1 << (7 * (cluster) + (cpu)))
+#define	VEXPRESS_SPC_WAKE_INTR_SWDOG		(1 << 10)
+#define	VEXPRESS_SPC_WAKE_INTR_GTIMER		(1 << 11)
+#define	VEXPRESS_SPC_WAKE_INTR_MASK		0xFFF
+
+#ifdef CONFIG_VEXPRESS_SPC
+extern bool vexpress_spc_check_loaded(void);
+extern void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set);
+extern void vexpress_spc_set_global_wakeup_intr(bool set);
+extern int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr);
+extern int vexpress_spc_get_performance(u32 cluster, u32 *freq);
+extern int vexpress_spc_set_performance(u32 cluster, u32 freq);
+extern void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr);
+extern int vexpress_spc_get_nb_cpus(u32 cluster);
+extern void vexpress_spc_powerdown_enable(u32 cluster, bool enable);
+#else
+static inline bool vexpress_spc_check_loaded(void) { return false; }
+static inline void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster,
+						   bool set) { }
+static inline void vexpress_spc_set_global_wakeup_intr(bool set) { }
+static inline int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr)
+{
+	return -ENODEV;
+}
+static inline int vexpress_spc_get_performance(u32 cluster, u32 *freq)
+{
+	return -ENODEV;
+}
+static inline int vexpress_spc_set_performance(u32 cluster, u32 freq)
+{
+	return -ENODEV;
+}
+static inline void vexpress_spc_write_resume_reg(u32 cluster,
+						 u32 cpu, u32 addr) { }
+static inline int vexpress_spc_get_nb_cpus(u32 cluster) { return -ENODEV; }
+static inline void vexpress_spc_powerdown_enable(u32 cluster, bool enable) { }
+#endif
+
 #endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 9044769f2296..2d4e3d793f79 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -195,7 +195,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
 extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
-void refresh_cpu_vm_stats(int);
+bool refresh_cpu_vm_stats(int);
 void refresh_zone_stat_thresholds(void);
 
 void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff28cf578d01..ef4df5b86e85 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -303,6 +303,33 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 
+	/*
+	 * Per-cpu workqueues are generally preferred because they tend to
+	 * show better performance thanks to cache locality.  Per-cpu
+	 * workqueues exclude the scheduler from choosing the CPU to
+	 * execute the worker threads, which has an unfortunate side effect
+	 * of increasing power consumption.
+	 *
+	 * The scheduler considers a CPU idle if it doesn't have any task
+	 * to execute and tries to keep idle cores idle to conserve power;
+	 * however, for example, a per-cpu work item scheduled from an
+	 * interrupt handler on an idle CPU will force the scheduler to
+	 * excute the work item on that CPU breaking the idleness, which in
+	 * turn may lead to more scheduling choices which are sub-optimal
+	 * in terms of power consumption.
+	 *
+	 * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
+	 * but become unbound if workqueue.power_efficient kernel param is
+	 * specified.  Per-cpu workqueues which are identified to
+	 * contribute significantly to power-consumption are identified and
+	 * marked with this flag and enabling the power_efficient mode
+	 * leads to noticeable power saving at the cost of small
+	 * performance disadvantage.
+	 *
+	 * http://thread.gmane.org/gmane.linux.kernel/1480396
+	 */
+	WQ_POWER_EFFICIENT	= 1 << 7,
+
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 
@@ -333,11 +360,19 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
+ *
+ * *_power_efficient_wq are inclined towards saving power and converted
+ * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
+ * they are same as their non-power-efficient counterparts - e.g.
+ * system_power_efficient_wq is identical to system_wq if
+ * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_power_efficient_wq;
+extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
 static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
 {
diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index 04598f1efd77..a2e15cad76a0 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -11,6 +11,7 @@
 #ifndef __LINUX_SND_SOC_DPCM_H
 #define __LINUX_SND_SOC_DPCM_H
 
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <sound/pcm.h>
 
@@ -135,4 +136,25 @@ int soc_dpcm_be_digital_mute(struct snd_soc_pcm_runtime *fe, int mute);
 int soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd);
 int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *);
 
+int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
+	int stream, struct snd_soc_dapm_widget_list **list_);
+int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
+	int stream, struct snd_soc_dapm_widget_list **list, int new);
+int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream);
+int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream);
+void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream);
+void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream);
+int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream);
+int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int tream);
+int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd);
+int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream);
+int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
+	int event);
+
+static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list)
+{
+	kfree(*list);
+}
+
+
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 85c15226103b..5bbdc653a826 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1063,6 +1063,7 @@ struct snd_soc_pcm_runtime {
 
 	/* Dynamic PCM BE runtime data */
 	struct snd_soc_dpcm_runtime dpcm[2];
+	int fe_compr;
 
 	long pmdown_time;
 	unsigned char pop_wait:1;
diff --git a/include/trace/events/arm-ipi.h b/include/trace/events/arm-ipi.h
new file mode 100644
index 000000000000..5d3bd21827be
--- /dev/null
+++ b/include/trace/events/arm-ipi.h
@@ -0,0 +1,100 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM arm-ipi
+
+#if !defined(_TRACE_ARM_IPI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM_IPI_H
+
+#include <linux/tracepoint.h>
+
+#define show_arm_ipi_name(val)				\
+	__print_symbolic(val,				\
+			 { 0, "IPI_WAKEUP" },		\
+			 { 1, "IPI_TIMER" },		\
+			 { 2, "IPI_RESCHEDULE" },		\
+			 { 3, "IPI_CALL_FUNC" },		\
+			 { 4, "IPI_CALL_FUNC_SINGLE" },		\
+			 { 5, "IPI_CPU_STOP" },	\
+			 { 6, "IPI_COMPLETION" },		\
+			 { 7, "IPI_CPU_BACKTRACE" })
+
+DECLARE_EVENT_CLASS(arm_ipi,
+
+	TP_PROTO(unsigned int ipi_nr),
+
+	TP_ARGS(ipi_nr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	ipi	)
+	),
+
+	TP_fast_assign(
+		__entry->ipi = ipi_nr;
+	),
+
+	TP_printk("ipi=%u [action=%s]", __entry->ipi,
+		show_arm_ipi_name(__entry->ipi))
+);
+
+/**
+ * arm_ipi_entry - called in the arm-generic ipi handler immediately before
+ *                 entering ipi-type handler
+ * @ipi_nr:  ipi number
+ *
+ * When used in combination with the arm_ipi_exit tracepoint
+ * we can determine the ipi handler runtine.
+ */
+DEFINE_EVENT(arm_ipi, arm_ipi_entry,
+
+	TP_PROTO(unsigned int ipi_nr),
+
+	TP_ARGS(ipi_nr)
+);
+
+/**
+ * arm_ipi_exit - called in the arm-generic ipi handler immediately
+ *                after the ipi-type handler returns
+ * @ipi_nr:  ipi number
+ *
+ * When used in combination with the arm_ipi_entry tracepoint
+ * we can determine the ipi handler runtine.
+ */
+DEFINE_EVENT(arm_ipi, arm_ipi_exit,
+
+	TP_PROTO(unsigned int ipi_nr),
+
+	TP_ARGS(ipi_nr)
+);
+
+/**
+ * arm_ipi_send - called as the ipi target mask is built, immediately
+ *                before the register is written
+ * @ipi_nr:  ipi number
+ * @dest:    cpu to send to
+ *
+ * When used in combination with the arm_ipi_entry tracepoint
+ * we can determine the ipi raise to run latency.
+ */
+TRACE_EVENT(arm_ipi_send,
+
+	TP_PROTO(unsigned int ipi_nr, int dest),
+
+	TP_ARGS(ipi_nr, dest),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	ipi	)
+		__field(	int			,	dest )
+	),
+
+	TP_fast_assign(
+		__entry->ipi = ipi_nr;
+		__entry->dest = dest;
+	),
+
+	TP_printk("dest=%d ipi=%u [action=%s]", __entry->dest,
+			__entry->ipi, show_arm_ipi_name(__entry->ipi))
+);
+
+#endif /*  _TRACE_ARM_IPI_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
 	TP_printk("gsi %u level %d source %d",
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 #if defined(__KVM_HAVE_IOAPIC)
 #define kvm_deliver_mode		\
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
 #endif
 );
 
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 
 
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
 	kvm_async_pf_completed,
-	TP_PROTO(unsigned long address, struct page *page, u64 gva),
-	TP_ARGS(address, page, gva),
+	TP_PROTO(unsigned long address, u64 gva),
+	TP_ARGS(address, gva),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, address)
-		__field(pfn_t, pfn)
 		__field(u64, gva)
 		),
 
 	TP_fast_assign(
 		__entry->address = address;
-		__entry->pfn = page ? page_to_pfn(page) : 0;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-		  __entry->address, __entry->pfn)
+	TP_printk("gva %#llx address %#lx",  __entry->gva,
+		  __entry->address)
 );
 
 #endif
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h
new file mode 100644
index 000000000000..f76dd4de625e
--- /dev/null
+++ b/include/trace/events/power_cpu_migrate.h
@@ -0,0 +1,67 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_CPU_MIGRATE_H
+
+#include <linux/tracepoint.h>
+
+#define __cpu_migrate_proto			\
+	TP_PROTO(u64 timestamp,			\
+		 u32 cpu_hwid)
+#define __cpu_migrate_args			\
+	TP_ARGS(timestamp,			\
+		cpu_hwid)
+
+DECLARE_EVENT_CLASS(cpu_migrate,
+
+	__cpu_migrate_proto,
+	__cpu_migrate_args,
+
+	TP_STRUCT__entry(
+		__field(u64,	timestamp		)
+		__field(u32,	cpu_hwid		)
+	),
+
+	TP_fast_assign(
+		__entry->timestamp = timestamp;
+		__entry->cpu_hwid = cpu_hwid;
+	),
+
+	TP_printk("timestamp=%llu cpu_hwid=0x%08lX",
+		(unsigned long long)__entry->timestamp,
+		(unsigned long)__entry->cpu_hwid
+	)
+);
+
+#define __define_cpu_migrate_event(name)		\
+	DEFINE_EVENT(cpu_migrate, cpu_migrate_##name,	\
+		__cpu_migrate_proto,			\
+		__cpu_migrate_args			\
+	)
+
+__define_cpu_migrate_event(begin);
+__define_cpu_migrate_event(finish);
+__define_cpu_migrate_event(current);
+
+#undef __define_cpu_migrate
+#undef __cpu_migrate_proto
+#undef __cpu_migrate_args
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+
+/*
+ * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate
+ * a whole-cluster migration:
+ */
+#define CPU_MIGRATE_ALL_CPUS 0x80000000U
+#endif
+
+#endif /* _TRACE_POWER_CPU_MIGRATE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE power_cpu_migrate
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e5586caff67a..2afcb71857fd 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -430,6 +430,280 @@ TRACE_EVENT(sched_pi_setprio,
 			__entry->oldprio, __entry->newprio)
 );
 
+/*
+ * Tracepoint for showing tracked load contribution.
+ */
+TRACE_EVENT(sched_task_load_contrib,
+
+	TP_PROTO(struct task_struct *tsk, unsigned long load_contrib),
+
+	TP_ARGS(tsk, load_contrib),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(unsigned long, load_contrib)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid            = tsk->pid;
+		__entry->load_contrib   = load_contrib;
+	),
+
+	TP_printk("comm=%s pid=%d load_contrib=%lu",
+			__entry->comm, __entry->pid,
+			__entry->load_contrib)
+);
+
+/*
+ * Tracepoint for showing tracked task runnable ratio [0..1023].
+ */
+TRACE_EVENT(sched_task_runnable_ratio,
+
+	TP_PROTO(struct task_struct *tsk, unsigned long ratio),
+
+	TP_ARGS(tsk, ratio),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(unsigned long, ratio)
+	),
+
+	TP_fast_assign(
+	memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid   = tsk->pid;
+		__entry->ratio = ratio;
+	),
+
+	TP_printk("comm=%s pid=%d ratio=%lu",
+			__entry->comm, __entry->pid,
+			__entry->ratio)
+);
+
+/*
+ * Tracepoint for showing tracked rq runnable ratio [0..1023].
+ */
+TRACE_EVENT(sched_rq_runnable_ratio,
+
+	TP_PROTO(int cpu, unsigned long ratio),
+
+	TP_ARGS(cpu, ratio),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(unsigned long, ratio)
+	),
+
+	TP_fast_assign(
+		__entry->cpu   = cpu;
+		__entry->ratio = ratio;
+	),
+
+	TP_printk("cpu=%d ratio=%lu",
+			__entry->cpu,
+			__entry->ratio)
+);
+
+/*
+ * Tracepoint for showing tracked rq runnable load.
+ */
+TRACE_EVENT(sched_rq_runnable_load,
+
+	TP_PROTO(int cpu, u64 load),
+
+	TP_ARGS(cpu, load),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(u64, load)
+	),
+
+	TP_fast_assign(
+		__entry->cpu  = cpu;
+		__entry->load = load;
+	),
+
+	TP_printk("cpu=%d load=%llu",
+			__entry->cpu,
+			__entry->load)
+);
+
+TRACE_EVENT(sched_rq_nr_running,
+
+	TP_PROTO(int cpu, unsigned int nr_running, int nr_iowait),
+
+	TP_ARGS(cpu, nr_running, nr_iowait),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(unsigned int, nr_running)
+		__field(int, nr_iowait)
+	),
+
+	TP_fast_assign(
+		__entry->cpu  = cpu;
+		__entry->nr_running = nr_running;
+		__entry->nr_iowait = nr_iowait;
+	),
+
+	TP_printk("cpu=%d nr_running=%u nr_iowait=%d",
+			__entry->cpu,
+			__entry->nr_running, __entry->nr_iowait)
+);
+
+/*
+ * Tracepoint for showing tracked task cpu usage ratio [0..1023].
+ */
+TRACE_EVENT(sched_task_usage_ratio,
+
+	TP_PROTO(struct task_struct *tsk, unsigned long ratio),
+
+	TP_ARGS(tsk, ratio),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(unsigned long, ratio)
+	),
+
+	TP_fast_assign(
+	memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid   = tsk->pid;
+		__entry->ratio = ratio;
+	),
+
+	TP_printk("comm=%s pid=%d ratio=%lu",
+			__entry->comm, __entry->pid,
+			__entry->ratio)
+);
+
+/*
+ * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
+ * marking the forced transition of runnable or running tasks.
+ */
+TRACE_EVENT(sched_hmp_migrate_force_running,
+
+	TP_PROTO(struct task_struct *tsk, int running),
+
+	TP_ARGS(tsk, running),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(int, running)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->running = running;
+	),
+
+	TP_printk("running=%d comm=%s",
+		__entry->running, __entry->comm)
+);
+
+/*
+ * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
+ * marking the forced transition of runnable or running
+ * tasks when a task is about to go idle.
+ */
+TRACE_EVENT(sched_hmp_migrate_idle_running,
+
+	TP_PROTO(struct task_struct *tsk, int running),
+
+	TP_ARGS(tsk, running),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(int, running)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->running = running;
+	),
+
+	TP_printk("running=%d comm=%s",
+		__entry->running, __entry->comm)
+);
+
+/*
+ * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations.
+ */
+#define HMP_MIGRATE_WAKEUP 0
+#define HMP_MIGRATE_FORCE  1
+#define HMP_MIGRATE_OFFLOAD 2
+#define HMP_MIGRATE_IDLE_PULL 3
+TRACE_EVENT(sched_hmp_migrate,
+
+	TP_PROTO(struct task_struct *tsk, int dest, int force),
+
+	TP_ARGS(tsk, dest, force),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(int,  dest)
+		__field(int,  force)
+	),
+
+	TP_fast_assign(
+	memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid   = tsk->pid;
+		__entry->dest  = dest;
+		__entry->force = force;
+	),
+
+	TP_printk("comm=%s pid=%d dest=%d force=%d",
+			__entry->comm, __entry->pid,
+			__entry->dest, __entry->force)
+);
+
+TRACE_EVENT(sched_hmp_offload_abort,
+
+	TP_PROTO(int cpu, int data, char *label),
+
+	TP_ARGS(cpu,data,label),
+
+	TP_STRUCT__entry(
+		__array(char, label, 64)
+		__field(int, cpu)
+		__field(int, data)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->label, label, 64);
+		__entry->cpu   = cpu;
+		__entry->data = data;
+	),
+
+	TP_printk("cpu=%d data=%d label=%63s",
+			__entry->cpu, __entry->data,
+			__entry->label)
+);
+
+TRACE_EVENT(sched_hmp_offload_succeed,
+
+	TP_PROTO(int cpu, int dest_cpu),
+
+	TP_ARGS(cpu,dest_cpu),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(int, dest_cpu)
+	),
+
+	TP_fast_assign(
+		__entry->cpu   = cpu;
+		__entry->dest_cpu = dest_cpu;
+	),
+
+	TP_printk("cpu=%d dest=%d",
+			__entry->cpu,
+			__entry->dest_cpu)
+);
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h
new file mode 100644
index 000000000000..da0baf27a39a
--- /dev/null
+++ b/include/trace/events/smp.h
@@ -0,0 +1,90 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smp
+
+#if !defined(_TRACE_SMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMP_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(smp_call_class,
+
+	TP_PROTO(void * fnc),
+
+	TP_ARGS(fnc),
+
+	TP_STRUCT__entry(
+		__field( void *, func )
+	),
+
+	TP_fast_assign(
+		__entry->func = fnc;
+	),
+
+	TP_printk("func=%pf", __entry->func)
+);
+
+/**
+ * smp_call_func_entry - called in the generic smp-cross-call-handler
+ * 						 immediately before calling the destination
+ * 						 function
+ * @func:  function pointer
+ *
+ * When used in combination with the smp_call_func_exit tracepoint
+ * we can determine the cross-call runtime.
+ */
+DEFINE_EVENT(smp_call_class, smp_call_func_entry,
+
+	TP_PROTO(void * fnc),
+
+	TP_ARGS(fnc)
+);
+
+/**
+ * smp_call_func_exit - called in the generic smp-cross-call-handler
+ * 						immediately after the destination function
+ * 						returns
+ * @func:  function pointer
+ *
+ * When used in combination with the smp_call_entry tracepoint
+ * we can determine the cross-call runtime.
+ */
+DEFINE_EVENT(smp_call_class, smp_call_func_exit,
+
+	TP_PROTO(void * fnc),
+
+	TP_ARGS(fnc)
+);
+
+/**
+ * smp_call_func_send - called as destination function is set
+ * 						in the per-cpu storage
+ * @func:  function pointer
+ * @dest:  cpu to send to
+ *
+ * When used in combination with the smp_cross_call_entry tracepoint
+ * we can determine the call-to-run latency.
+ */
+TRACE_EVENT(smp_call_func_send,
+
+	TP_PROTO(void * func, int dest),
+
+	TP_ARGS(func, dest),
+
+	TP_STRUCT__entry(
+		__field(	void * 	,	func )
+		__field(	int		,	dest )
+	),
+
+	TP_fast_assign(
+		__entry->func = func;
+		__entry->dest = dest;
+	),
+
+	TP_printk("dest=%d func=%pf", __entry->dest,
+			__entry->func)
+);
+
+#endif /*  _TRACE_SMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index bdc6e87ff3eb..405887bec8b3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -311,6 +311,7 @@ header-y += ppp-ioctl.h
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += psci.h
 header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index 8e2b7bac4378..59c17a2d38ad 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -22,6 +22,7 @@
 #define EM_PPC		20	/* PowerPC */
 #define EM_PPC64	21	 /* PowerPC64 */
 #define EM_SPU		23	/* Cell BE SPU */
+#define EM_ARM		40	/* ARM 32 bit */
 #define EM_SH		42	/* SuperH */
 #define EM_SPARCV9	43	/* SPARC v9 64-bit */
 #define EM_IA_64	50	/* HP/Intel IA-64 */
@@ -34,6 +35,7 @@
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
 #define EM_TI_C6000	140	/* TI C6X DSPs */
+#define EM_AARCH64	183	/* ARM 64 bit */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d88c8ee00c8b..00d2c69a3cb6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_WATCHDOG         21
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
+#define KVM_EXIT_SYSTEM_EVENT     24
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
 		struct {
 			__u32 epr;
 		} epr;
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -391,8 +399,9 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
-/* for KVM_SET_MPSTATE */
+/* for KVM_SET_MP_STATE */
 
+/* not all states are valid on all architectures */
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
 #define KVM_MP_STATE_INIT_RECEIVED     2
@@ -541,6 +550,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -568,9 +578,7 @@ struct kvm_ppc_smmu_info {
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
-#endif
 #ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
@@ -652,9 +660,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
-#endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
@@ -666,6 +672,10 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -783,6 +793,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_IA64		0x3000000000000000ULL
 #define KVM_REG_ARM		0x4000000000000000ULL
 #define KVM_REG_S390		0x5000000000000000ULL
+#define KVM_REG_ARM64		0x6000000000000000ULL
 #define KVM_REG_MIPS		0x7000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT	52
@@ -837,9 +848,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
+#define  KVM_DEV_VFIO_GROUP			1
+#define   KVM_DEV_VFIO_GROUP_ADD			1
+#define   KVM_DEV_VFIO_GROUP_DEL			2
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
@@ -977,7 +1004,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
 #define KVM_NMI                   _IO(KVMIO,   0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
@@ -1009,6 +1036,7 @@ struct kvm_s390_ucas_mapping {
 /* VM is being stopped by host */
 #define KVM_KVMCLOCK_CTRL	  _IO(KVMIO,   0xad)
 #define KVM_ARM_VCPU_INIT	  _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
+/*
+ * ARM Power State and Coordination Interface (PSCI) header
+ *
+ * This header holds common PSCI defines and macros shared
+ * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Anup Patel <anup.patel@linaro.org>
+ */
+
+#ifndef _UAPI_LINUX_PSCI_H
+#define _UAPI_LINUX_PSCI_H
+
+/*
+ * PSCI v0.1 interface
+ *
+ * The PSCI v0.1 function numbers are implementation defined.
+ *
+ * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
+ * INVALID_PARAMS, and DENIED defined below are applicable
+ * to PSCI v0.1.
+ */
+
+/* PSCI v0.2 interface */
+#define PSCI_0_2_FN_BASE			0x84000000
+#define PSCI_0_2_FN(n)				(PSCI_0_2_FN_BASE + (n))
+#define PSCI_0_2_64BIT				0x40000000
+#define PSCI_0_2_FN64_BASE			\
+					(PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
+#define PSCI_0_2_FN64(n)			(PSCI_0_2_FN64_BASE + (n))
+
+#define PSCI_0_2_FN_PSCI_VERSION		PSCI_0_2_FN(0)
+#define PSCI_0_2_FN_CPU_SUSPEND			PSCI_0_2_FN(1)
+#define PSCI_0_2_FN_CPU_OFF			PSCI_0_2_FN(2)
+#define PSCI_0_2_FN_CPU_ON			PSCI_0_2_FN(3)
+#define PSCI_0_2_FN_AFFINITY_INFO		PSCI_0_2_FN(4)
+#define PSCI_0_2_FN_MIGRATE			PSCI_0_2_FN(5)
+#define PSCI_0_2_FN_MIGRATE_INFO_TYPE		PSCI_0_2_FN(6)
+#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU		PSCI_0_2_FN(7)
+#define PSCI_0_2_FN_SYSTEM_OFF			PSCI_0_2_FN(8)
+#define PSCI_0_2_FN_SYSTEM_RESET		PSCI_0_2_FN(9)
+
+#define PSCI_0_2_FN64_CPU_SUSPEND		PSCI_0_2_FN64(1)
+#define PSCI_0_2_FN64_CPU_ON			PSCI_0_2_FN64(3)
+#define PSCI_0_2_FN64_AFFINITY_INFO		PSCI_0_2_FN64(4)
+#define PSCI_0_2_FN64_MIGRATE			PSCI_0_2_FN64(5)
+#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU	PSCI_0_2_FN64(7)
+
+/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
+#define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
+#define PSCI_0_2_POWER_STATE_ID_SHIFT		0
+#define PSCI_0_2_POWER_STATE_TYPE_SHIFT		16
+#define PSCI_0_2_POWER_STATE_TYPE_MASK		\
+				(0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+#define PSCI_0_2_POWER_STATE_AFFL_SHIFT		24
+#define PSCI_0_2_POWER_STATE_AFFL_MASK		\
+				(0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+
+/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
+#define PSCI_0_2_AFFINITY_LEVEL_ON		0
+#define PSCI_0_2_AFFINITY_LEVEL_OFF		1
+#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING	2
+
+/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
+#define PSCI_0_2_TOS_UP_MIGRATE			0
+#define PSCI_0_2_TOS_UP_NO_MIGRATE		1
+#define PSCI_0_2_TOS_MP				2
+
+/* PSCI version decoding (independent of PSCI version) */
+#define PSCI_VERSION_MAJOR_SHIFT		16
+#define PSCI_VERSION_MINOR_MASK			\
+		((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
+#define PSCI_VERSION_MAJOR_MASK			~PSCI_VERSION_MINOR_MASK
+#define PSCI_VERSION_MAJOR(ver)			\
+		(((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
+#define PSCI_VERSION_MINOR(ver)			\
+		((ver) & PSCI_VERSION_MINOR_MASK)
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define PSCI_RET_SUCCESS			0
+#define PSCI_RET_NOT_SUPPORTED			-1
+#define PSCI_RET_INVALID_PARAMS			-2
+#define PSCI_RET_DENIED				-3
+#define PSCI_RET_ALREADY_ON			-4
+#define PSCI_RET_ON_PENDING			-5
+#define PSCI_RET_INTERNAL_FAILURE		-6
+#define PSCI_RET_NOT_PRESENT			-7
+#define PSCI_RET_DISABLED			-8
+
+#endif /* _UAPI_LINUX_PSCI_H */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 6bd4a90d1991..4d230eafd5e8 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		f->lsm_rule = NULL;
 
 		/* Support legacy tests for a valid loginuid */
-		if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) {
+		if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
 			f->type = AUDIT_LOGINUID_SET;
 			f->val = 0;
 		}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 8ab8e9390297..3fcb6faa5fa6 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -23,10 +23,35 @@
 static struct lock_class_key irq_desc_lock_class;
 
 #if defined(CONFIG_SMP)
+static int __init irq_affinity_setup(char *str)
+{
+	zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+	cpulist_parse(str, irq_default_affinity);
+	/*
+	 * Set at least the boot cpu. We don't want to end up with
+	 * bugreports caused by random comandline masks
+	 */
+	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+	return 1;
+}
+__setup("irqaffinity=", irq_affinity_setup);
+
+extern struct cpumask hmp_slow_cpu_mask;
+
 static void __init init_irq_default_affinity(void)
 {
-	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
-	cpumask_setall(irq_default_affinity);
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!irq_default_affinity)
+		zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+#endif
+#ifdef CONFIG_SCHED_HMP
+	if (!cpumask_empty(&hmp_slow_cpu_mask)) {
+		cpumask_copy(irq_default_affinity, &hmp_slow_cpu_mask);
+		return;
+	}
+#endif
+	if (cpumask_empty(irq_default_affinity))
+		cpumask_setall(irq_default_affinity);
 }
 #else
 static void __init init_irq_default_affinity(void)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5dfdc9ea180b..46455961a88f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -263,6 +263,26 @@ config PM_GENERIC_DOMAINS
 	bool
 	depends on PM
 
+config WQ_POWER_EFFICIENT_DEFAULT
+	bool "Enable workqueue power-efficient mode by default"
+	depends on PM
+	default n
+	help
+	  Per-cpu workqueues are generally preferred because they show
+	  better performance thanks to cache locality; unfortunately,
+	  per-cpu workqueues tend to be more power hungry than unbound
+	  workqueues.
+
+	  Enabling workqueue.power_efficient kernel parameter makes the
+	  per-cpu workqueues which were observed to contribute
+	  significantly to power consumption unbound, leading to measurably
+	  lower power usage at the cost of small performance overhead.
+
+	  This config option determines whether workqueue.power_efficient
+	  is enabled by default.
+
+	  If in doubt, say N.
+
 config PM_GENERIC_DOMAINS_SLEEP
 	def_bool y
 	depends on PM_SLEEP && PM_GENERIC_DOMAINS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c771f2547bef..aa08f6419beb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1407,7 +1407,11 @@ void scheduler_ipi(void)
 {
 	if (llist_empty(&this_rq()->wake_list)
 			&& !tick_nohz_full_cpu(smp_processor_id())
-			&& !got_nohz_idle_kick())
+			&& !got_nohz_idle_kick()
+#ifdef CONFIG_SCHED_HMP
+			&& !this_rq()->wake_for_idle_pull
+#endif
+			)
 		return;
 
 	/*
@@ -1434,6 +1438,11 @@ void scheduler_ipi(void)
 		this_rq()->idle_balance = 1;
 		raise_softirq_irqoff(SCHED_SOFTIRQ);
 	}
+#ifdef CONFIG_SCHED_HMP
+	else if (unlikely(this_rq()->wake_for_idle_pull))
+		raise_softirq_irqoff(SCHED_SOFTIRQ);
+#endif
+
 	irq_exit();
 }
 
@@ -1623,6 +1632,20 @@ static void __sched_fork(struct task_struct *p)
 #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
 	p->se.avg.runnable_avg_period = 0;
 	p->se.avg.runnable_avg_sum = 0;
+#ifdef CONFIG_SCHED_HMP
+	/* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */
+#define LOAD_AVG_MAX 47742
+	p->se.avg.hmp_last_up_migration = 0;
+	p->se.avg.hmp_last_down_migration = 0;
+	if (hmp_task_should_forkboost(p)) {
+		p->se.avg.load_avg_ratio = 1023;
+		p->se.avg.load_avg_contrib =
+				(1023 * scale_load_down(p->se.load.weight));
+		p->se.avg.runnable_avg_period = LOAD_AVG_MAX;
+		p->se.avg.runnable_avg_sum = LOAD_AVG_MAX;
+		p->se.avg.usage_avg_sum = LOAD_AVG_MAX;
+	}
+#endif
 #endif
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3825,6 +3848,8 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 	return pid ? find_task_by_vpid(pid) : current;
 }
 
+extern struct cpumask hmp_slow_cpu_mask;
+
 /* Actually do priority change: must hold rq lock. */
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
@@ -3834,8 +3859,17 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 	p->normal_prio = normal_prio(p);
 	/* we are holding p->pi_lock already */
 	p->prio = rt_mutex_getprio(p);
-	if (rt_prio(p->prio))
+	if (rt_prio(p->prio)) {
 		p->sched_class = &rt_sched_class;
+#ifdef CONFIG_SCHED_HMP
+		if (!cpumask_empty(&hmp_slow_cpu_mask))
+			if (cpumask_equal(&p->cpus_allowed, cpu_all_mask)) {
+				p->nr_cpus_allowed =
+					cpumask_weight(&hmp_slow_cpu_mask);
+				do_set_cpus_allowed(p, &hmp_slow_cpu_mask);
+			}
+#endif
+	}
 	else
 		p->sched_class = &fair_sched_class;
 	set_load_weight(p);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 701b6c8a4b12..1e23284fd692 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 #ifdef CONFIG_SMP
 	P(se->avg.runnable_avg_sum);
 	P(se->avg.runnable_avg_period);
+	P(se->avg.usage_avg_sum);
 	P(se->avg.load_avg_contrib);
 	P(se->avg.decay_count);
 #endif
@@ -223,6 +224,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			cfs_rq->tg_runnable_contrib);
 	SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg",
 			atomic_read(&cfs_rq->tg->runnable_avg));
+	SEQ_printf(m, "  .%-30s: %d\n", "tg->usage_avg",
+			atomic_read(&cfs_rq->tg->usage_avg));
 #endif
 #ifdef CONFIG_CFS_BANDWIDTH
 	SEQ_printf(m, "  .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
@@ -574,6 +577,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 		   "nr_involuntary_switches", (long long)p->nivcsw);
 
 	P(se.load.weight);
+#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+	P(se.avg.runnable_avg_sum);
+	P(se.avg.runnable_avg_period);
+	P(se.avg.load_avg_contrib);
+	P(se.avg.decay_count);
+#endif
 	P(policy);
 	P(prio);
 #undef PN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c7ab8eab5427..41d0cbda605d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -31,9 +31,21 @@
 #include <linux/task_work.h>
 
 #include <trace/events/sched.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+/* Include cpufreq header to add a notifier so that cpu frequency
+ * scaling can track the current CPU frequency
+ */
+#include <linux/cpufreq.h>
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
+#ifdef CONFIG_SCHED_HMP
+#include <linux/cpuidle.h>
+#endif
 
 #include "sched.h"
 
+
 /*
  * Targeted preemption latency for CPU-bound tasks:
  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
@@ -1208,8 +1220,91 @@ static u32 __compute_runnable_contrib(u64 n)
 	return contrib + runnable_avg_yN_sum[n];
 }
 
-/*
- * We can represent the historical contribution to runnable average as the
+#ifdef CONFIG_SCHED_HMP
+#define HMP_VARIABLE_SCALE_SHIFT 16ULL
+struct hmp_global_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+	ssize_t (*store)(struct kobject *a, struct attribute *b,
+			const char *c, size_t count);
+	int *value;
+	int (*to_sysfs)(int);
+	int (*from_sysfs)(int);
+	ssize_t (*to_sysfs_text)(char *buf, int buf_size);
+};
+
+#define HMP_DATA_SYSFS_MAX 8
+
+struct hmp_data_struct {
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	int freqinvar_load_scale_enabled;
+#endif
+	int multiplier; /* used to scale the time delta */
+	struct attribute_group attr_group;
+	struct attribute *attributes[HMP_DATA_SYSFS_MAX + 1];
+	struct hmp_global_attr attr[HMP_DATA_SYSFS_MAX];
+} hmp_data;
+
+static u64 hmp_variable_scale_convert(u64 delta);
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+/* Frequency-Invariant Load Modification:
+ * Loads are calculated as in PJT's patch however we also scale the current
+ * contribution in line with the frequency of the CPU that the task was
+ * executed on.
+ * In this version, we use a simple linear scale derived from the maximum
+ * frequency reported by CPUFreq. As an example:
+ *
+ * Consider that we ran a task for 100% of the previous interval.
+ *
+ * Our CPU was under asynchronous frequency control through one of the
+ * CPUFreq governors.
+ *
+ * The CPUFreq governor reports that it is able to scale the CPU between
+ * 500MHz and 1GHz.
+ *
+ * During the period, the CPU was running at 1GHz.
+ *
+ * In this case, our load contribution for that period is calculated as
+ * 1 * (number_of_active_microseconds)
+ *
+ * This results in our task being able to accumulate maximum load as normal.
+ *
+ *
+ * Consider now that our CPU was executing at 500MHz.
+ *
+ * We now scale the load contribution such that it is calculated as
+ * 0.5 * (number_of_active_microseconds)
+ *
+ * Our task can only record 50% maximum load during this period.
+ *
+ * This represents the task consuming 50% of the CPU's *possible* compute
+ * capacity. However the task did consume 100% of the CPU's *available*
+ * compute capacity which is the value seen by the CPUFreq governor and
+ * user-side CPU Utilization tools.
+ *
+ * Restricting tracked load to be scaled by the CPU's frequency accurately
+ * represents the consumption of possible compute capacity and allows the
+ * HMP migration's simple threshold migration strategy to interact more
+ * predictably with CPUFreq's asynchronous compute capacity changes.
+ */
+#define SCHED_FREQSCALE_SHIFT 10
+struct cpufreq_extents {
+	u32 curr_scale;
+	u32 min;
+	u32 max;
+	u32 flags;
+};
+/* Flag set when the governor in use only allows one frequency.
+ * Disables scaling.
+ */
+#define SCHED_LOAD_FREQINVAR_SINGLEFREQ 0x01
+
+static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS];
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
+#endif /* CONFIG_SCHED_HMP */
+
+/* We can represent the historical contribution to runnable average as the
  * coefficients of a geometric series.  To do this we sub-divide our runnable
  * history into segments of approximately 1ms (1024us); label the segment that
  * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
@@ -1238,13 +1333,24 @@ static u32 __compute_runnable_contrib(u64 n)
  */
 static __always_inline int __update_entity_runnable_avg(u64 now,
 							struct sched_avg *sa,
-							int runnable)
+							int runnable,
+							int running,
+							int cpu)
 {
 	u64 delta, periods;
 	u32 runnable_contrib;
 	int delta_w, decayed = 0;
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	u64 scaled_delta;
+	u32 scaled_runnable_contrib;
+	int scaled_delta_w;
+	u32 curr_scale = 1024;
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
 
 	delta = now - sa->last_runnable_update;
+#ifdef CONFIG_SCHED_HMP
+	delta = hmp_variable_scale_convert(delta);
+#endif
 	/*
 	 * This should only happen when time goes backwards, which it
 	 * unfortunately does during sched clock init when we swap over to TSC.
@@ -1263,6 +1369,12 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 		return 0;
 	sa->last_runnable_update = now;
 
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	/* retrieve scale factor for load */
+	if (hmp_data.freqinvar_load_scale_enabled)
+		curr_scale = freq_scale[cpu].curr_scale;
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
+
 	/* delta_w is the amount already accumulated against our next period */
 	delta_w = sa->runnable_avg_period % 1024;
 	if (delta + delta_w >= 1024) {
@@ -1275,8 +1387,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 		 * period and accrue it.
 		 */
 		delta_w = 1024 - delta_w;
+		/* scale runnable time if necessary */
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+		scaled_delta_w = (delta_w * curr_scale)
+				>> SCHED_FREQSCALE_SHIFT;
+		if (runnable)
+			sa->runnable_avg_sum += scaled_delta_w;
+		if (running)
+			sa->usage_avg_sum += scaled_delta_w;
+#else
 		if (runnable)
 			sa->runnable_avg_sum += delta_w;
+		if (running)
+			sa->usage_avg_sum += delta_w;
+#endif /* #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
 		sa->runnable_avg_period += delta_w;
 
 		delta -= delta_w;
@@ -1284,22 +1408,49 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 		/* Figure out how many additional periods this update spans */
 		periods = delta / 1024;
 		delta %= 1024;
-
+		/* decay the load we have accumulated so far */
 		sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
 						  periods + 1);
 		sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
 						     periods + 1);
-
+		sa->usage_avg_sum = decay_load(sa->usage_avg_sum, periods + 1);
+		/* add the contribution from this period */
 		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
 		runnable_contrib = __compute_runnable_contrib(periods);
+		/* Apply load scaling if necessary.
+		 * Note that multiplying the whole series is same as
+		 * multiplying all terms
+		 */
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+		scaled_runnable_contrib = (runnable_contrib * curr_scale)
+				>> SCHED_FREQSCALE_SHIFT;
+		if (runnable)
+			sa->runnable_avg_sum += scaled_runnable_contrib;
+		if (running)
+			sa->usage_avg_sum += scaled_runnable_contrib;
+#else
 		if (runnable)
 			sa->runnable_avg_sum += runnable_contrib;
+		if (running)
+			sa->usage_avg_sum += runnable_contrib;
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
 		sa->runnable_avg_period += runnable_contrib;
 	}
 
 	/* Remainder of delta accrued against u_0` */
+	/* scale if necessary */
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	scaled_delta = ((delta * curr_scale) >> SCHED_FREQSCALE_SHIFT);
+	if (runnable)
+		sa->runnable_avg_sum += scaled_delta;
+	if (running)
+		sa->usage_avg_sum += scaled_delta;
+#else
 	if (runnable)
 		sa->runnable_avg_sum += delta;
+	if (running)
+		sa->usage_avg_sum += delta;
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
 	sa->runnable_avg_period += delta;
 
 	return decayed;
@@ -1312,12 +1463,9 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
 	u64 decays = atomic64_read(&cfs_rq->decay_counter);
 
 	decays -= se->avg.decay_count;
-	if (!decays)
-		return 0;
-
-	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+	if (decays)
+		se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
 	se->avg.decay_count = 0;
-
 	return decays;
 }
 
@@ -1345,16 +1493,28 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
 						  struct cfs_rq *cfs_rq)
 {
 	struct task_group *tg = cfs_rq->tg;
-	long contrib;
+	long contrib, usage_contrib;
 
 	/* The fraction of a cpu used by this cfs_rq */
 	contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
 			  sa->runnable_avg_period + 1);
 	contrib -= cfs_rq->tg_runnable_contrib;
 
-	if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
+	usage_contrib = div_u64(sa->usage_avg_sum << NICE_0_SHIFT,
+			        sa->runnable_avg_period + 1);
+	usage_contrib -= cfs_rq->tg_usage_contrib;
+
+	/*
+	 * contrib/usage at this point represent deltas, only update if they
+	 * are substantive.
+	 */
+	if ((abs(contrib) > cfs_rq->tg_runnable_contrib / 64) ||
+	    (abs(usage_contrib) > cfs_rq->tg_usage_contrib / 64)) {
 		atomic_add(contrib, &tg->runnable_avg);
 		cfs_rq->tg_runnable_contrib += contrib;
+
+		atomic_add(usage_contrib, &tg->usage_avg);
+		cfs_rq->tg_usage_contrib += usage_contrib;
 	}
 }
 
@@ -1415,12 +1575,18 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
 	contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
 	contrib /= (se->avg.runnable_avg_period + 1);
 	se->avg.load_avg_contrib = scale_load(contrib);
+	trace_sched_task_load_contrib(task_of(se), se->avg.load_avg_contrib);
+	contrib = se->avg.runnable_avg_sum * scale_load_down(NICE_0_LOAD);
+	contrib /= (se->avg.runnable_avg_period + 1);
+	se->avg.load_avg_ratio = scale_load(contrib);
+	trace_sched_task_runnable_ratio(task_of(se), se->avg.load_avg_ratio);
 }
 
 /* Compute the current contribution to load_avg by se, return any delta */
-static long __update_entity_load_avg_contrib(struct sched_entity *se)
+static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio)
 {
 	long old_contrib = se->avg.load_avg_contrib;
+	long old_ratio   = se->avg.load_avg_ratio;
 
 	if (entity_is_task(se)) {
 		__update_task_entity_contrib(se);
@@ -1429,6 +1595,8 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
 		__update_group_entity_contrib(se);
 	}
 
+	if (ratio)
+		*ratio = se->avg.load_avg_ratio - old_ratio;
 	return se->avg.load_avg_contrib - old_contrib;
 }
 
@@ -1448,9 +1616,13 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 					  int update_cfs_rq)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	long contrib_delta;
+	long contrib_delta, ratio_delta;
 	u64 now;
+	int cpu = -1;   /* not used in normal case */
 
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	cpu = cfs_rq->rq->cpu;
+#endif
 	/*
 	 * For a group entity we need to use their owned cfs_rq_clock_task() in
 	 * case they are the parent of a throttled hierarchy.
@@ -1460,18 +1632,21 @@ static inline void update_entity_load_avg(struct sched_entity *se,
 	else
 		now = cfs_rq_clock_task(group_cfs_rq(se));
 
-	if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
+	if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq,
+			cfs_rq->curr == se, cpu))
 		return;
 
-	contrib_delta = __update_entity_load_avg_contrib(se);
+	contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta);
 
 	if (!update_cfs_rq)
 		return;
 
-	if (se->on_rq)
+	if (se->on_rq) {
 		cfs_rq->runnable_load_avg += contrib_delta;
-	else
+		rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta;
+	} else {
 		subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+	}
 }
 
 /*
@@ -1504,8 +1679,17 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
+	int cpu = -1;	/* not used in normal case */
+
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	cpu = rq->cpu;
+#endif
+	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable,
+				     runnable, cpu);
 	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
+	trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
+	trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
+	trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter);
 }
 
 /* Add the load generated by se into cfs_rq's child load-average */
@@ -1547,6 +1731,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
 	}
 
 	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+	rq_of(cfs_rq)->avg.load_avg_ratio += se->avg.load_avg_ratio;
+
 	/* we force update consideration on load-balancer moves */
 	update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -1565,6 +1751,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
 	update_cfs_rq_blocked_load(cfs_rq, !sleep);
 
 	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+	rq_of(cfs_rq)->avg.load_avg_ratio -= se->avg.load_avg_ratio;
+
 	if (sleep) {
 		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -1893,6 +2081,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 */
 		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
+		update_entity_load_avg(se, 1);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
@@ -3341,6 +3530,835 @@ done:
 	return target;
 }
 
+#ifdef CONFIG_SCHED_HMP
+/*
+ * Heterogenous multiprocessor (HMP) optimizations
+ *
+ * The cpu types are distinguished using a list of hmp_domains
+ * which each represent one cpu type using a cpumask.
+ * The list is assumed ordered by compute capacity with the
+ * fastest domain first.
+ */
+DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
+static const int hmp_max_tasks = 5;
+
+extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
+
+#ifdef CONFIG_CPU_IDLE
+/*
+ * hmp_idle_pull:
+ *
+ * In this version we have stopped using forced up migrations when we
+ * detect that a task running on a little CPU should be moved to a bigger
+ * CPU. In most cases, the bigger CPU is in a deep sleep state and a forced
+ * migration means we stop the task immediately but need to wait for the
+ * target CPU to wake up before we can restart the task which is being
+ * moved. Instead, we now wake a big CPU with an IPI and ask it to pull
+ * a task when ready. This allows the task to continue executing on its
+ * current CPU, reducing the amount of time that the task is stalled for.
+ *
+ * keepalive timers:
+ *
+ * The keepalive timer is used as a way to keep a CPU engaged in an
+ * idle pull operation out of idle while waiting for the source
+ * CPU to stop and move the task. Ideally this would not be necessary
+ * and we could impose a temporary zero-latency requirement on the
+ * current CPU, but in the current QoS framework this will result in
+ * all CPUs in the system being unable to enter idle states which is
+ * not desirable. The timer does not perform any work when it expires.
+ */
+struct hmp_keepalive {
+	bool init;
+	ktime_t delay;	/* if zero, no need for timer */
+	struct hrtimer timer;
+};
+DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive);
+
+/* setup per-cpu keepalive timers */
+static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer)
+{
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Work out if any of the idle states have an exit latency too high for us.
+ * ns_delay is passed in containing the max we are willing to tolerate.
+ * If there are none, set ns_delay to zero.
+ * If there are any, set ns_delay to
+ * ('target_residency of state with shortest too-big latency' - 1) * 1000.
+ */
+static void hmp_keepalive_delay(int cpu, unsigned int *ns_delay)
+{
+	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+	struct cpuidle_driver *drv;
+
+	drv = cpuidle_get_cpu_driver(dev);
+	if (drv) {
+		unsigned int us_delay = UINT_MAX;
+		unsigned int us_max_delay = *ns_delay / 1000;
+		int idx;
+		/* if cpuidle states are guaranteed to be sorted we
+		 * could stop at the first match.
+		 */
+		for (idx = 0; idx < drv->state_count; idx++) {
+			if (drv->states[idx].exit_latency > us_max_delay &&
+				drv->states[idx].target_residency < us_delay) {
+				us_delay = drv->states[idx].target_residency;
+			}
+		}
+		if (us_delay == UINT_MAX)
+			*ns_delay = 0; /* no timer required */
+		else
+			*ns_delay = 1000 * (us_delay - 1);
+	}
+}
+
+static void hmp_cpu_keepalive_trigger(void)
+{
+	int cpu = smp_processor_id();
+	struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
+	if (!keepalive->init) {
+		unsigned int ns_delay = 100000; /* tolerate 100usec delay */
+
+		hrtimer_init(&keepalive->timer,
+				CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+		keepalive->timer.function = hmp_cpu_keepalive_notify;
+
+		hmp_keepalive_delay(cpu, &ns_delay);
+		keepalive->delay = ns_to_ktime(ns_delay);
+		keepalive->init = true;
+	}
+	if (ktime_to_ns(keepalive->delay))
+		hrtimer_start(&keepalive->timer,
+			keepalive->delay, HRTIMER_MODE_REL_PINNED);
+}
+
+static void hmp_cpu_keepalive_cancel(int cpu)
+{
+	struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
+	if (keepalive->init)
+		hrtimer_cancel(&keepalive->timer);
+}
+#else /* !CONFIG_CPU_IDLE */
+static void hmp_cpu_keepalive_trigger(void)
+{
+}
+
+static void hmp_cpu_keepalive_cancel(int cpu)
+{
+}
+#endif
+
+/* Setup hmp_domains */
+static int __init hmp_cpu_mask_setup(void)
+{
+	char buf[64];
+	struct hmp_domain *domain;
+	struct list_head *pos;
+	int dc, cpu;
+
+	pr_debug("Initializing HMP scheduler:\n");
+
+	/* Initialize hmp_domains using platform code */
+	arch_get_hmp_domains(&hmp_domains);
+	if (list_empty(&hmp_domains)) {
+		pr_debug("HMP domain list is empty!\n");
+		return 0;
+	}
+
+	/* Print hmp_domains */
+	dc = 0;
+	list_for_each(pos, &hmp_domains) {
+		domain = list_entry(pos, struct hmp_domain, hmp_domains);
+		cpulist_scnprintf(buf, 64, &domain->possible_cpus);
+		pr_debug("  HMP domain %d: %s\n", dc, buf);
+
+		for_each_cpu_mask(cpu, domain->possible_cpus) {
+			per_cpu(hmp_cpu_domain, cpu) = domain;
+		}
+		dc++;
+	}
+
+	return 1;
+}
+
+static struct hmp_domain *hmp_get_hmp_domain_for_cpu(int cpu)
+{
+	struct hmp_domain *domain;
+	struct list_head *pos;
+
+	list_for_each(pos, &hmp_domains) {
+		domain = list_entry(pos, struct hmp_domain, hmp_domains);
+		if(cpumask_test_cpu(cpu, &domain->possible_cpus))
+			return domain;
+	}
+	return NULL;
+}
+
+static void hmp_online_cpu(int cpu)
+{
+	struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);
+
+	if(domain)
+		cpumask_set_cpu(cpu, &domain->cpus);
+}
+
+static void hmp_offline_cpu(int cpu)
+{
+	struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);
+
+	if(domain)
+		cpumask_clear_cpu(cpu, &domain->cpus);
+
+	hmp_cpu_keepalive_cancel(cpu);
+}
+/*
+ * Needed to determine heaviest tasks etc.
+ */
+static inline unsigned int hmp_cpu_is_fastest(int cpu);
+static inline unsigned int hmp_cpu_is_slowest(int cpu);
+static inline struct hmp_domain *hmp_slower_domain(int cpu);
+static inline struct hmp_domain *hmp_faster_domain(int cpu);
+
+/* must hold runqueue lock for queue se is currently on */
+static struct sched_entity *hmp_get_heaviest_task(
+				struct sched_entity *se, int target_cpu)
+{
+	int num_tasks = hmp_max_tasks;
+	struct sched_entity *max_se = se;
+	unsigned long int max_ratio = se->avg.load_avg_ratio;
+	const struct cpumask *hmp_target_mask = NULL;
+	struct hmp_domain *hmp;
+
+	if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
+		return max_se;
+
+	hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
+	hmp_target_mask = &hmp->cpus;
+	if (target_cpu >= 0) {
+		/* idle_balance gets run on a CPU while
+		 * it is in the middle of being hotplugged
+		 * out. Bail early in that case.
+		 */
+		if(!cpumask_test_cpu(target_cpu, hmp_target_mask))
+			return NULL;
+		hmp_target_mask = cpumask_of(target_cpu);
+	}
+	/* The currently running task is not on the runqueue */
+	se = __pick_first_entity(cfs_rq_of(se));
+
+	while (num_tasks && se) {
+		if (entity_is_task(se) &&
+			se->avg.load_avg_ratio > max_ratio &&
+			cpumask_intersects(hmp_target_mask,
+				tsk_cpus_allowed(task_of(se)))) {
+			max_se = se;
+			max_ratio = se->avg.load_avg_ratio;
+		}
+		se = __pick_next_entity(se);
+		num_tasks--;
+	}
+	return max_se;
+}
+
+static struct sched_entity *hmp_get_lightest_task(
+				struct sched_entity *se, int migrate_down)
+{
+	int num_tasks = hmp_max_tasks;
+	struct sched_entity *min_se = se;
+	unsigned long int min_ratio = se->avg.load_avg_ratio;
+	const struct cpumask *hmp_target_mask = NULL;
+
+	if (migrate_down) {
+		struct hmp_domain *hmp;
+		if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq)))
+			return min_se;
+		hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq));
+		hmp_target_mask = &hmp->cpus;
+	}
+	/* The currently running task is not on the runqueue */
+	se = __pick_first_entity(cfs_rq_of(se));
+
+	while (num_tasks && se) {
+		if (entity_is_task(se) &&
+			(se->avg.load_avg_ratio < min_ratio &&
+			hmp_target_mask &&
+				cpumask_intersects(hmp_target_mask,
+				tsk_cpus_allowed(task_of(se))))) {
+			min_se = se;
+			min_ratio = se->avg.load_avg_ratio;
+		}
+		se = __pick_next_entity(se);
+		num_tasks--;
+	}
+	return min_se;
+}
+
+/*
+ * Migration thresholds should be in the range [0..1023]
+ * hmp_up_threshold: min. load required for migrating tasks to a faster cpu
+ * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
+ *
+ * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
+ * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
+ * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
+ */
+unsigned int hmp_up_threshold = 700;
+unsigned int hmp_down_threshold = 512;
+#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
+unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
+#endif
+unsigned int hmp_next_up_threshold = 4096;
+unsigned int hmp_next_down_threshold = 4096;
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Set the default packing threshold to try to keep little
+ * CPUs at no more than 80% of their maximum frequency if only
+ * packing a small number of small tasks. Bigger tasks will
+ * raise frequency as normal.
+ * In order to pack a task onto a CPU, the sum of the
+ * unweighted runnable_avg load of existing tasks plus the
+ * load of the new task must be less than hmp_full_threshold.
+ *
+ * This works in conjunction with frequency-invariant load
+ * and DVFS governors. Since most DVFS governors aim for 80%
+ * utilisation, we arrive at (0.8*0.8*(max_load=1024))=655
+ * and use a value slightly lower to give a little headroom
+ * in the decision.
+ * Note that the most efficient frequency is different for
+ * each system so /sys/kernel/hmp/packing_limit should be
+ * configured at runtime for any given platform to achieve
+ * optimal energy usage. Some systems may not benefit from
+ * packing, so this feature can also be disabled at runtime
+ * with /sys/kernel/hmp/packing_enable
+ */
+unsigned int hmp_packing_enabled = 1;
+unsigned int hmp_full_threshold = 650;
+#endif
+
+static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
+static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
+						int *min_cpu, struct cpumask *affinity);
+
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+	return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
+
+/* Check if cpu is in fastest hmp_domain */
+static inline unsigned int hmp_cpu_is_fastest(int cpu)
+{
+	struct list_head *pos;
+
+	pos = &hmp_cpu_domain(cpu)->hmp_domains;
+	return pos == hmp_domains.next;
+}
+
+/* Check if cpu is in slowest hmp_domain */
+static inline unsigned int hmp_cpu_is_slowest(int cpu)
+{
+	struct list_head *pos;
+
+	pos = &hmp_cpu_domain(cpu)->hmp_domains;
+	return list_is_last(pos, &hmp_domains);
+}
+
+/* Next (slower) hmp_domain relative to cpu */
+static inline struct hmp_domain *hmp_slower_domain(int cpu)
+{
+	struct list_head *pos;
+
+	pos = &hmp_cpu_domain(cpu)->hmp_domains;
+	return list_entry(pos->next, struct hmp_domain, hmp_domains);
+}
+
+/* Previous (faster) hmp_domain relative to cpu */
+static inline struct hmp_domain *hmp_faster_domain(int cpu)
+{
+	struct list_head *pos;
+
+	pos = &hmp_cpu_domain(cpu)->hmp_domains;
+	return list_entry(pos->prev, struct hmp_domain, hmp_domains);
+}
+
+/*
+ * Selects a cpu in previous (faster) hmp_domain
+ */
+static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
+							int cpu)
+{
+	int lowest_cpu=NR_CPUS;
+	__always_unused int lowest_ratio;
+	struct hmp_domain *hmp;
+
+	if (hmp_cpu_is_fastest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_faster_domain(cpu);
+
+	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+			tsk_cpus_allowed(tsk));
+
+	return lowest_cpu;
+}
+
+/*
+ * Selects a cpu in next (slower) hmp_domain
+ * Note that cpumask_any_and() returns the first cpu in the cpumask
+ */
+static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
+							int cpu)
+{
+	int lowest_cpu=NR_CPUS;
+	struct hmp_domain *hmp;
+	__always_unused int lowest_ratio;
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
+
+	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+			tsk_cpus_allowed(tsk));
+
+	return lowest_cpu;
+}
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * be loaded less than hmp_full_threshold according to
+ * the sum of the tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+		int cpu) {
+	int tmp_cpu;
+	unsigned long estimated_load;
+	struct hmp_domain *hmp;
+	struct sched_avg *avg;
+	struct cpumask allowed_hmp_cpus;
+
+	if(!hmp_packing_enabled ||
+			tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+		return hmp_select_slower_cpu(tsk, cpu);
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
+
+	/* respect affinity */
+	cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+			tsk_cpus_allowed(tsk));
+
+	for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+		avg = &cpu_rq(tmp_cpu)->avg;
+		/* estimate new rq load if we add this task */
+		estimated_load = avg->load_avg_ratio +
+				tsk->se.avg.load_avg_ratio;
+		if (estimated_load <= hmp_full_threshold) {
+			cpu = tmp_cpu;
+			break;
+		}
+	}
+	/* if no match was found, the task uses the initial value */
+	return cpu;
+}
+#endif
+static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
+{
+	/* hack - always use clock from first online CPU */
+	u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
+	se->avg.hmp_last_up_migration = now;
+	se->avg.hmp_last_down_migration = 0;
+	cpu_rq(cpu)->avg.hmp_last_up_migration = now;
+	cpu_rq(cpu)->avg.hmp_last_down_migration = 0;
+}
+
+static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
+{
+	/* hack - always use clock from first online CPU */
+	u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
+	se->avg.hmp_last_down_migration = now;
+	se->avg.hmp_last_up_migration = 0;
+	cpu_rq(cpu)->avg.hmp_last_down_migration = now;
+	cpu_rq(cpu)->avg.hmp_last_up_migration = 0;
+}
+
+/*
+ * Heterogenous multiprocessor (HMP) optimizations
+ *
+ * These functions allow to change the growing speed of the load_avg_ratio
+ * by default it goes from 0 to 0.5 in LOAD_AVG_PERIOD = 32ms
+ * This can now be changed with /sys/kernel/hmp/load_avg_period_ms.
+ *
+ * These functions also allow to change the up and down threshold of HMP
+ * using /sys/kernel/hmp/{up,down}_threshold.
+ * Both must be between 0 and 1023. The threshold that is compared
+ * to the load_avg_ratio is up_threshold/1024 and down_threshold/1024.
+ *
+ * For instance, if load_avg_period = 64 and up_threshold = 512, an idle
+ * task with a load of 0 will reach the threshold after 64ms of busy loop.
+ *
+ * Changing load_avg_periods_ms has the same effect than changing the
+ * default scaling factor Y=1002/1024 in the load_avg_ratio computation to
+ * (1002/1024.0)^(LOAD_AVG_PERIOD/load_avg_period_ms), but the last one
+ * could trigger overflows.
+ * For instance, with Y = 1023/1024 in __update_task_entity_contrib()
+ * "contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);"
+ * could be overflowed for a weight > 2^12 even is the load_avg_contrib
+ * should still be a 32bits result. This would not happen by multiplicating
+ * delta time by 1/22 and setting load_avg_period_ms = 706.
+ */
+
+/*
+ * By scaling the delta time it end-up increasing or decrease the
+ * growing speed of the per entity load_avg_ratio
+ * The scale factor hmp_data.multiplier is a fixed point
+ * number: (32-HMP_VARIABLE_SCALE_SHIFT).HMP_VARIABLE_SCALE_SHIFT
+ */
+static inline u64 hmp_variable_scale_convert(u64 delta)
+{
+#ifdef CONFIG_HMP_VARIABLE_SCALE
+	u64 high = delta >> 32ULL;
+	u64 low = delta & 0xffffffffULL;
+	low *= hmp_data.multiplier;
+	high *= hmp_data.multiplier;
+	return (low >> HMP_VARIABLE_SCALE_SHIFT)
+			+ (high << (32ULL - HMP_VARIABLE_SCALE_SHIFT));
+#else
+	return delta;
+#endif
+}
+
+static ssize_t hmp_show(struct kobject *kobj,
+				struct attribute *attr, char *buf)
+{
+	struct hmp_global_attr *hmp_attr =
+		container_of(attr, struct hmp_global_attr, attr);
+	int temp;
+
+	if (hmp_attr->to_sysfs_text != NULL)
+		return hmp_attr->to_sysfs_text(buf, PAGE_SIZE);
+
+	temp = *(hmp_attr->value);
+	if (hmp_attr->to_sysfs != NULL)
+		temp = hmp_attr->to_sysfs(temp);
+
+	return (ssize_t)sprintf(buf, "%d\n", temp);
+}
+
+static ssize_t hmp_store(struct kobject *a, struct attribute *attr,
+				const char *buf, size_t count)
+{
+	int temp;
+	ssize_t ret = count;
+	struct hmp_global_attr *hmp_attr =
+		container_of(attr, struct hmp_global_attr, attr);
+	char *str = vmalloc(count + 1);
+	if (str == NULL)
+		return -ENOMEM;
+	memcpy(str, buf, count);
+	str[count] = 0;
+	if (sscanf(str, "%d", &temp) < 1)
+		ret = -EINVAL;
+	else {
+		if (hmp_attr->from_sysfs != NULL)
+			temp = hmp_attr->from_sysfs(temp);
+		if (temp < 0)
+			ret = -EINVAL;
+		else
+			*(hmp_attr->value) = temp;
+	}
+	vfree(str);
+	return ret;
+}
+
+static ssize_t hmp_print_domains(char *outbuf, int outbufsize)
+{
+	char buf[64];
+	const char nospace[] = "%s", space[] = " %s";
+	const char *fmt = nospace;
+	struct hmp_domain *domain;
+	struct list_head *pos;
+	int outpos = 0;
+	list_for_each(pos, &hmp_domains) {
+		domain = list_entry(pos, struct hmp_domain, hmp_domains);
+		if (cpumask_scnprintf(buf, 64, &domain->possible_cpus)) {
+			outpos += sprintf(outbuf+outpos, fmt, buf);
+			fmt = space;
+		}
+	}
+	strcat(outbuf, "\n");
+	return outpos+1;
+}
+
+#ifdef CONFIG_HMP_VARIABLE_SCALE
+static int hmp_period_tofrom_sysfs(int value)
+{
+	return (LOAD_AVG_PERIOD << HMP_VARIABLE_SCALE_SHIFT) / value;
+}
+#endif
+/* max value for threshold is 1024 */
+static int hmp_theshold_from_sysfs(int value)
+{
+	if (value > 1024)
+		return -1;
+	return value;
+}
+#if defined(CONFIG_SCHED_HMP_LITTLE_PACKING) || \
+		defined(CONFIG_HMP_FREQUENCY_INVARIANT_SCALE)
+/* toggle control is only 0,1 off/on */
+static int hmp_toggle_from_sysfs(int value)
+{
+	if (value < 0 || value > 1)
+		return -1;
+	return value;
+}
+#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+	if (value < 0)
+		return -1;
+	return value;
+}
+#endif
+static void hmp_attr_add(
+	const char *name,
+	int *value,
+	int (*to_sysfs)(int),
+	int (*from_sysfs)(int),
+	ssize_t (*to_sysfs_text)(char *, int),
+	umode_t mode)
+{
+	int i = 0;
+	while (hmp_data.attributes[i] != NULL) {
+		i++;
+		if (i >= HMP_DATA_SYSFS_MAX)
+			return;
+	}
+	if (mode)
+		hmp_data.attr[i].attr.mode = mode;
+	else
+		hmp_data.attr[i].attr.mode = 0644;
+	hmp_data.attr[i].show = hmp_show;
+	hmp_data.attr[i].store = hmp_store;
+	hmp_data.attr[i].attr.name = name;
+	hmp_data.attr[i].value = value;
+	hmp_data.attr[i].to_sysfs = to_sysfs;
+	hmp_data.attr[i].from_sysfs = from_sysfs;
+	hmp_data.attr[i].to_sysfs_text = to_sysfs_text;
+	hmp_data.attributes[i] = &hmp_data.attr[i].attr;
+	hmp_data.attributes[i + 1] = NULL;
+}
+
+static int hmp_attr_init(void)
+{
+	int ret;
+	memset(&hmp_data, sizeof(hmp_data), 0);
+	hmp_attr_add("hmp_domains",
+		NULL,
+		NULL,
+		NULL,
+		hmp_print_domains,
+		0444);
+	hmp_attr_add("up_threshold",
+		&hmp_up_threshold,
+		NULL,
+		hmp_theshold_from_sysfs,
+		NULL,
+		0);
+	hmp_attr_add("down_threshold",
+		&hmp_down_threshold,
+		NULL,
+		hmp_theshold_from_sysfs,
+		NULL,
+		0);
+#ifdef CONFIG_HMP_VARIABLE_SCALE
+	/* by default load_avg_period_ms == LOAD_AVG_PERIOD
+	 * meaning no change
+	 */
+	hmp_data.multiplier = hmp_period_tofrom_sysfs(LOAD_AVG_PERIOD);
+	hmp_attr_add("load_avg_period_ms",
+		&hmp_data.multiplier,
+		hmp_period_tofrom_sysfs,
+		hmp_period_tofrom_sysfs,
+		NULL,
+		0);
+#endif
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+	/* default frequency-invariant scaling ON */
+	hmp_data.freqinvar_load_scale_enabled = 1;
+	hmp_attr_add("frequency_invariant_load_scale",
+		&hmp_data.freqinvar_load_scale_enabled,
+		NULL,
+		hmp_toggle_from_sysfs,
+		NULL,
+		0);
+#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	hmp_attr_add("packing_enable",
+		&hmp_packing_enabled,
+		NULL,
+		hmp_toggle_from_sysfs,
+		NULL,
+		0);
+	hmp_attr_add("packing_limit",
+		&hmp_full_threshold,
+		NULL,
+		hmp_packing_from_sysfs,
+		NULL,
+		0);
+#endif
+	hmp_data.attr_group.name = "hmp";
+	hmp_data.attr_group.attrs = hmp_data.attributes;
+	ret = sysfs_create_group(kernel_kobj,
+		&hmp_data.attr_group);
+	return 0;
+}
+late_initcall(hmp_attr_init);
+/*
+ * return the load of the lowest-loaded CPU in a given HMP domain
+ * min_cpu optionally points to an int to receive the CPU.
+ * affinity optionally points to a cpumask containing the
+ * CPUs to be considered. note:
+ *   + min_cpu = NR_CPUS only if no CPUs are in the set of
+ *     affinity && hmp_domain cpus
+ *   + min_cpu will always otherwise equal one of the CPUs in
+ *     the hmp domain
+ *   + when more than one CPU has the same load, the one which
+ *     is least-recently-disturbed by an HMP migration will be
+ *     selected
+ *   + if all CPUs are equally loaded or idle and the times are
+ *     all the same, the first in the set will be used
+ *   + if affinity is not set, cpu_online_mask is used
+ */
+static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
+						int *min_cpu, struct cpumask *affinity)
+{
+	int cpu;
+	int min_cpu_runnable_temp = NR_CPUS;
+	u64 min_target_last_migration = ULLONG_MAX;
+	u64 curr_last_migration;
+	unsigned long min_runnable_load = INT_MAX;
+	unsigned long contrib;
+	struct sched_avg *avg;
+	struct cpumask temp_cpumask;
+	/*
+	 * only look at CPUs allowed if specified,
+	 * otherwise look at all online CPUs in the
+	 * right HMP domain
+	 */
+	cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask);
+
+	for_each_cpu_mask(cpu, temp_cpumask) {
+		avg = &cpu_rq(cpu)->avg;
+		/* used for both up and down migration */
+		curr_last_migration = avg->hmp_last_up_migration ?
+			avg->hmp_last_up_migration : avg->hmp_last_down_migration;
+
+		contrib = avg->load_avg_ratio;
+		/*
+		 * Consider a runqueue completely busy if there is any load
+		 * on it. Definitely not the best for overall fairness, but
+		 * does well in typical Android use cases.
+		 */
+		if (contrib)
+			contrib = 1023;
+
+		if ((contrib < min_runnable_load) ||
+			(contrib == min_runnable_load &&
+			 curr_last_migration < min_target_last_migration)) {
+			/*
+			 * if the load is the same target the CPU with
+			 * the longest time since a migration.
+			 * This is to spread migration load between
+			 * members of a domain more evenly when the
+			 * domain is fully loaded
+			 */
+			min_runnable_load = contrib;
+			min_cpu_runnable_temp = cpu;
+			min_target_last_migration = curr_last_migration;
+		}
+	}
+
+	if (min_cpu)
+		*min_cpu = min_cpu_runnable_temp;
+
+	return min_runnable_load;
+}
+
+/*
+ * Calculate the task starvation
+ * This is the ratio of actually running time vs. runnable time.
+ * If the two are equal the task is getting the cpu time it needs or
+ * it is alone on the cpu and the cpu is fully utilized.
+ */
+static inline unsigned int hmp_task_starvation(struct sched_entity *se)
+{
+	u32 starvation;
+
+	starvation = se->avg.usage_avg_sum * scale_load_down(NICE_0_LOAD);
+	starvation /= (se->avg.runnable_avg_sum + 1);
+
+	return scale_load(starvation);
+}
+
+static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
+{
+	int min_usage;
+	int dest_cpu = NR_CPUS;
+
+	if (hmp_cpu_is_slowest(cpu))
+		return NR_CPUS;
+
+	/* Is there an idle CPU in the current domain */
+	min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL);
+	if (min_usage == 0) {
+		trace_sched_hmp_offload_abort(cpu, min_usage, "load");
+		return NR_CPUS;
+	}
+
+	/* Is the task alone on the cpu? */
+	if (cpu_rq(cpu)->cfs.h_nr_running < 2) {
+		trace_sched_hmp_offload_abort(cpu,
+			cpu_rq(cpu)->cfs.h_nr_running, "nr_running");
+		return NR_CPUS;
+	}
+
+	/* Is the task actually starving? */
+	/* >=25% ratio running/runnable = starving */
+	if (hmp_task_starvation(se) > 768) {
+		trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se),
+			"starvation");
+		return NR_CPUS;
+	}
+
+	/* Does the slower domain have any idle CPUs? */
+	min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu,
+			tsk_cpus_allowed(task_of(se)));
+
+	if (min_usage == 0) {
+		trace_sched_hmp_offload_succeed(cpu, dest_cpu);
+		return dest_cpu;
+	} else
+		trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain");
+	return NR_CPUS;
+}
+#endif /* CONFIG_SCHED_HMP */
+
 /*
  * sched_balance_self: balance the current task (running on cpu) in domains
  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -3365,6 +4383,19 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 	if (p->nr_cpus_allowed == 1)
 		return prev_cpu;
 
+#ifdef CONFIG_SCHED_HMP
+	/* always put non-kernel forking tasks on a big domain */
+	if (unlikely(sd_flag & SD_BALANCE_FORK) && hmp_task_should_forkboost(p)) {
+		new_cpu = hmp_select_faster_cpu(p, prev_cpu);
+		if (new_cpu != NR_CPUS) {
+			hmp_next_up_delay(&p->se, new_cpu);
+			return new_cpu;
+		}
+		/* failed to perform HMP fork balance, use normal balance */
+		new_cpu = cpu;
+	}
+#endif
+
 	if (sd_flag & SD_BALANCE_WAKE) {
 		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			want_affine = 1;
@@ -3439,6 +4470,35 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 unlock:
 	rcu_read_unlock();
 
+#ifdef CONFIG_SCHED_HMP
+	prev_cpu = task_cpu(p);
+
+	if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) {
+		hmp_next_up_delay(&p->se, new_cpu);
+		trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+		return new_cpu;
+	}
+	if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
+		new_cpu = hmp_select_slower_cpu(p, prev_cpu);
+#endif
+		/*
+		 * we might have no suitable CPU
+		 * in which case new_cpu == NR_CPUS
+		 */
+		if (new_cpu < NR_CPUS && new_cpu != prev_cpu) {
+			hmp_next_down_delay(&p->se, new_cpu);
+			trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+			return new_cpu;
+		}
+	}
+	/* Make sure that the task stays in its previous hmp domain */
+	if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
+		return prev_cpu;
+#endif
+
 	return new_cpu;
 }
 
@@ -3448,6 +4508,16 @@ unlock:
  * load-balance).
  */
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+#ifdef CONFIG_NO_HZ_COMMON
+static int nohz_test_cpu(int cpu);
+#else
+static inline int nohz_test_cpu(int cpu)
+{
+	return 0;
+}
+#endif
+
 /*
  * Called immediately before a task is migrated to a new cpu; task_cpu(p) and
  * cfs_rq_of(p) references at time of call are still valid and identify the
@@ -3467,6 +4537,25 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
 	 * be negative here since on-rq tasks have decay-count == 0.
 	 */
 	if (se->avg.decay_count) {
+		/*
+		 * If we migrate a sleeping task away from a CPU
+		 * which has the tick stopped, then both the clock_task
+		 * and decay_counter will be out of date for that CPU
+		 * and we will not decay load correctly.
+		 */
+		if (!se->on_rq && nohz_test_cpu(task_cpu(p))) {
+			struct rq *rq = cpu_rq(task_cpu(p));
+			unsigned long flags;
+			/*
+			 * Current CPU cannot be holding rq->lock in this
+			 * circumstance, but another might be. We must hold
+			 * rq->lock before we go poking around in its clocks
+			 */
+			raw_spin_lock_irqsave(&rq->lock, flags);
+			update_rq_clock(rq);
+			update_cfs_rq_blocked_load(cfs_rq, 0);
+			raw_spin_unlock_irqrestore(&rq->lock, flags);
+		}
 		se->avg.decay_count = -__synchronize_entity_decay(se);
 		atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
 	}
@@ -3972,7 +5061,6 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	 * 1) task is cache cold, or
 	 * 2) too many balance attempts have failed.
 	 */
-
 	tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
 	if (!tsk_cache_hot ||
 		env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
@@ -5258,6 +6346,16 @@ out:
 	return ld_moved;
 }
 
+#ifdef CONFIG_SCHED_HMP
+static unsigned int hmp_idle_pull(int this_cpu);
+static int move_specific_task(struct lb_env *env, struct task_struct *pm);
+#else
+static int move_specific_task(struct lb_env *env, struct task_struct *pm)
+{
+	return 0;
+}
+#endif
+
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
@@ -5302,7 +6400,10 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 		}
 	}
 	rcu_read_unlock();
-
+#ifdef CONFIG_SCHED_HMP
+	if (!pulled_task)
+		pulled_task = hmp_idle_pull(this_cpu);
+#endif
 	raw_spin_lock(&this_rq->lock);
 
 	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
@@ -5314,22 +6415,19 @@ void idle_balance(int this_cpu, struct rq *this_rq)
 	}
 }
 
-/*
- * active_load_balance_cpu_stop is run by cpu stopper. It pushes
- * running tasks off the busiest CPU onto idle CPUs. It requires at
- * least 1 task to be running on each physical CPU where possible, and
- * avoids physical / logical imbalances.
- */
-static int active_load_balance_cpu_stop(void *data)
+static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag)
 {
 	struct rq *busiest_rq = data;
 	int busiest_cpu = cpu_of(busiest_rq);
 	int target_cpu = busiest_rq->push_cpu;
 	struct rq *target_rq = cpu_rq(target_cpu);
 	struct sched_domain *sd;
+	struct task_struct *p = NULL;
 
 	raw_spin_lock_irq(&busiest_rq->lock);
-
+#ifdef CONFIG_SCHED_HMP
+	p = busiest_rq->migrate_task;
+#endif
 	/* make sure the requested cpu hasn't gone down in the meantime */
 	if (unlikely(busiest_cpu != smp_processor_id() ||
 		     !busiest_rq->active_balance))
@@ -5339,6 +6437,11 @@ static int active_load_balance_cpu_stop(void *data)
 	if (busiest_rq->nr_running <= 1)
 		goto out_unlock;
 
+	if (!check_sd_lb_flag) {
+		/* Task has migrated meanwhile, abort forced migration */
+		if (task_rq(p) != busiest_rq)
+			goto out_unlock;
+	}
 	/*
 	 * This condition is "impossible", if it occurs
 	 * we need to fix it. Originally reported by
@@ -5352,12 +6455,14 @@ static int active_load_balance_cpu_stop(void *data)
 	/* Search for an sd spanning us and the target CPU. */
 	rcu_read_lock();
 	for_each_domain(target_cpu, sd) {
-		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
+		if (((check_sd_lb_flag && sd->flags & SD_LOAD_BALANCE) ||
+			!check_sd_lb_flag) &&
+			cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
 				break;
 	}
 
 	if (likely(sd)) {
+		bool success = false;
 		struct lb_env env = {
 			.sd		= sd,
 			.dst_cpu	= target_cpu,
@@ -5369,7 +6474,14 @@ static int active_load_balance_cpu_stop(void *data)
 
 		schedstat_inc(sd, alb_count);
 
-		if (move_one_task(&env))
+		if (check_sd_lb_flag) {
+			if (move_one_task(&env))
+				success = true;
+		} else {
+			if (move_specific_task(&env, p))
+				success = true;
+		}
+		if (success)
 			schedstat_inc(sd, alb_pushed);
 		else
 			schedstat_inc(sd, alb_failed);
@@ -5377,11 +6489,24 @@ static int active_load_balance_cpu_stop(void *data)
 	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
+	if (!check_sd_lb_flag)
+		put_task_struct(p);
 	busiest_rq->active_balance = 0;
 	raw_spin_unlock_irq(&busiest_rq->lock);
 	return 0;
 }
 
+/*
+ * active_load_balance_cpu_stop is run by cpu stopper. It pushes
+ * running tasks off the busiest CPU onto idle CPUs. It requires at
+ * least 1 task to be running on each physical CPU where possible, and
+ * avoids physical / logical imbalances.
+ */
+static int active_load_balance_cpu_stop(void *data)
+{
+	return __do_active_load_balance_cpu_stop(data, true);
+}
+
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * idle load balancing details
@@ -5395,12 +6520,80 @@ static struct {
 	unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+/*
+ * nohz_test_cpu used when load tracking is enabled. FAIR_GROUP_SCHED
+ * dependency below may be removed when load tracking guards are
+ * removed.
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int nohz_test_cpu(int cpu)
+{
+	return cpumask_test_cpu(cpu, nohz.idle_cpus_mask);
+}
+#endif
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu, int ilb_needed)
+{
+	struct hmp_domain *hmp;
+	/* allow previous decision on non-slowest domain */
+	if (!hmp_cpu_is_slowest(cpu))
+		return ilb_needed;
+
+	/* if disabled, use normal ILB behaviour */
+	if (!hmp_packing_enabled)
+		return ilb_needed;
+
+	hmp = hmp_cpu_domain(cpu);
+	for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+		/* only idle balance if a CPU is loaded over threshold */
+		if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+			return 1;
+	}
+	return 0;
+}
+#endif
+
+DEFINE_PER_CPU(cpumask_var_t, ilb_tmpmask);
+
 static inline int find_new_ilb(int call_cpu)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
+#ifdef CONFIG_SCHED_HMP
+	int ilb_needed = 0;
+	int cpu;
+	struct cpumask* tmp = per_cpu(ilb_tmpmask, smp_processor_id());
+
+	/* restrict nohz balancing to occur in the same hmp domain */
+	ilb = cpumask_first_and(nohz.idle_cpus_mask,
+			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+	/* check to see if it's necessary within this domain */
+	cpumask_andnot(tmp,
+			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus,
+			nohz.idle_cpus_mask);
+	for_each_cpu(cpu, tmp) {
+		if (cpu_rq(cpu)->nr_running > 1) {
+			ilb_needed = 1;
+			break;
+		}
+	}
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	if (ilb < nr_cpu_ids)
+		ilb_needed = hmp_packing_ilb_needed(ilb, ilb_needed);
+#endif
 
+	if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+		return ilb;
+#else
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
+#endif
 
 	return nr_cpu_ids;
 }
@@ -5677,6 +6870,18 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 	if (time_before(now, nohz.next_balance))
 		return 0;
 
+#ifdef CONFIG_SCHED_HMP
+	/*
+	 * Bail out if there are no nohz CPUs in our
+	 * HMP domain, since we will move tasks between
+	 * domains through wakeup and force balancing
+	 * as necessary based upon task load.
+	 */
+	if (cpumask_first_and(nohz.idle_cpus_mask,
+			&((struct hmp_domain *)hmp_cpu_domain(cpu))->cpus) >= nr_cpu_ids)
+		return 0;
+#endif
+
 	if (rq->nr_running >= 2)
 		goto need_kick;
 
@@ -5709,6 +6914,442 @@ need_kick:
 static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
 #endif
 
+#ifdef CONFIG_SCHED_HMP
+static unsigned int hmp_task_eligible_for_up_migration(struct sched_entity *se)
+{
+	/* below hmp_up_threshold, never eligible */
+	if (se->avg.load_avg_ratio < hmp_up_threshold)
+		return 0;
+	return 1;
+}
+
+/* Check if task should migrate to a faster cpu */
+static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	int temp_target_cpu;
+	u64 now;
+
+	if (hmp_cpu_is_fastest(cpu))
+		return 0;
+
+#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
+	/* Filter by task priority */
+	if (p->prio >= hmp_up_prio)
+		return 0;
+#endif
+	if (!hmp_task_eligible_for_up_migration(se))
+		return 0;
+
+	/* Let the task load settle before doing another up migration */
+	/* hack - always use clock from first online CPU */
+	now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
+	if (((now - se->avg.hmp_last_up_migration) >> 10)
+					< hmp_next_up_threshold)
+		return 0;
+
+	/* hmp_domain_min_load only returns 0 for an
+	 * idle CPU or 1023 for any partly-busy one.
+	 * Be explicit about requirement for an idle CPU.
+	 */
+	if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu,
+			tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) {
+		if(target_cpu)
+			*target_cpu = temp_target_cpu;
+		return 1;
+	}
+	return 0;
+}
+
+/* Check if task should migrate to a slower cpu */
+static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	u64 now;
+
+	if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		if(hmp_packing_enabled)
+			return 1;
+		else
+#endif
+		return 0;
+	}
+
+#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
+	/* Filter by task priority */
+	if ((p->prio >= hmp_up_prio) &&
+		cpumask_intersects(&hmp_slower_domain(cpu)->cpus,
+					tsk_cpus_allowed(p))) {
+		return 1;
+	}
+#endif
+
+	/* Let the task load settle before doing another down migration */
+	/* hack - always use clock from first online CPU */
+	now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task;
+	if (((now - se->avg.hmp_last_down_migration) >> 10)
+					< hmp_next_down_threshold)
+		return 0;
+
+	if (cpumask_intersects(&hmp_slower_domain(cpu)->cpus,
+					tsk_cpus_allowed(p))
+		&& se->avg.load_avg_ratio < hmp_down_threshold) {
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * hmp_can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
+ * Ideally this function should be merged with can_migrate_task() to avoid
+ * redundant code.
+ */
+static int hmp_can_migrate_task(struct task_struct *p, struct lb_env *env)
+{
+	int tsk_cache_hot = 0;
+
+	/*
+	 * We do not migrate tasks that are:
+	 * 1) running (obviously), or
+	 * 2) cannot be migrated to this CPU due to cpus_allowed
+	 */
+	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
+		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+		return 0;
+	}
+	env->flags &= ~LBF_ALL_PINNED;
+
+	if (task_running(env->src_rq, p)) {
+		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+		return 0;
+	}
+
+	/*
+	 * Aggressive migration if:
+	 * 1) task is cache cold, or
+	 * 2) too many balance attempts have failed.
+	 */
+
+	tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
+	if (!tsk_cache_hot ||
+		env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
+#ifdef CONFIG_SCHEDSTATS
+		if (tsk_cache_hot) {
+			schedstat_inc(env->sd, lb_hot_gained[env->idle]);
+			schedstat_inc(p, se.statistics.nr_forced_migrations);
+		}
+#endif
+		return 1;
+	}
+
+	return 1;
+}
+
+/*
+ * move_specific_task tries to move a specific task.
+ * Returns 1 if successful and 0 otherwise.
+ * Called with both runqueues locked.
+ */
+static int move_specific_task(struct lb_env *env, struct task_struct *pm)
+{
+	struct task_struct *p, *n;
+
+	list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+	if (throttled_lb_pair(task_group(p), env->src_rq->cpu,
+				env->dst_cpu))
+		continue;
+
+		if (!hmp_can_migrate_task(p, env))
+			continue;
+		/* Check if we found the right task */
+		if (p != pm)
+			continue;
+
+		move_task(p, env);
+		/*
+		 * Right now, this is only the third place move_task()
+		 * is called, so we can safely collect move_task()
+		 * stats here rather than inside move_task().
+		 */
+		schedstat_inc(env->sd, lb_gained[env->idle]);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * hmp_active_task_migration_cpu_stop is run by cpu stopper and used to
+ * migrate a specific task from one runqueue to another.
+ * hmp_force_up_migration uses this to push a currently running task
+ * off a runqueue. hmp_idle_pull uses this to pull a currently
+ * running task to an idle runqueue.
+ * Reuses __do_active_load_balance_cpu_stop to actually do the work.
+ */
+static int hmp_active_task_migration_cpu_stop(void *data)
+{
+	return __do_active_load_balance_cpu_stop(data, false);
+}
+
+/*
+ * Move task in a runnable state to another CPU.
+ *
+ * Tailored on 'active_load_balance_cpu_stop' with slight
+ * modification to locking and pre-transfer checks.  Note
+ * rq->lock must be held before calling.
+ */
+static void hmp_migrate_runnable_task(struct rq *rq)
+{
+	struct sched_domain *sd;
+	int src_cpu = cpu_of(rq);
+	struct rq *src_rq = rq;
+	int dst_cpu = rq->push_cpu;
+	struct rq *dst_rq = cpu_rq(dst_cpu);
+	struct task_struct *p = rq->migrate_task;
+	/*
+	 * One last check to make sure nobody else is playing
+	 * with the source rq.
+	 */
+	if (src_rq->active_balance)
+		goto out;
+
+	if (src_rq->nr_running <= 1)
+		goto out;
+
+	if (task_rq(p) != src_rq)
+		goto out;
+	/*
+	 * Not sure if this applies here but one can never
+	 * be too cautious
+	 */
+	BUG_ON(src_rq == dst_rq);
+
+	double_lock_balance(src_rq, dst_rq);
+
+	rcu_read_lock();
+	for_each_domain(dst_cpu, sd) {
+		if (cpumask_test_cpu(src_cpu, sched_domain_span(sd)))
+			break;
+	}
+
+	if (likely(sd)) {
+		struct lb_env env = {
+			.sd             = sd,
+			.dst_cpu        = dst_cpu,
+			.dst_rq         = dst_rq,
+			.src_cpu        = src_cpu,
+			.src_rq         = src_rq,
+			.idle           = CPU_IDLE,
+		};
+
+		schedstat_inc(sd, alb_count);
+
+		if (move_specific_task(&env, p))
+			schedstat_inc(sd, alb_pushed);
+		else
+			schedstat_inc(sd, alb_failed);
+	}
+
+	rcu_read_unlock();
+	double_unlock_balance(src_rq, dst_rq);
+out:
+	put_task_struct(p);
+}
+
+static DEFINE_SPINLOCK(hmp_force_migration);
+
+/*
+ * hmp_force_up_migration checks runqueues for tasks that need to
+ * be actively migrated to a faster cpu.
+ */
+static void hmp_force_up_migration(int this_cpu)
+{
+	int cpu, target_cpu;
+	struct sched_entity *curr, *orig;
+	struct rq *target;
+	unsigned long flags;
+	unsigned int force, got_target;
+	struct task_struct *p;
+
+	if (!spin_trylock(&hmp_force_migration))
+		return;
+	for_each_online_cpu(cpu) {
+		force = 0;
+		got_target = 0;
+		target = cpu_rq(cpu);
+		raw_spin_lock_irqsave(&target->lock, flags);
+		curr = target->cfs.curr;
+		if (!curr || target->active_balance) {
+			raw_spin_unlock_irqrestore(&target->lock, flags);
+			continue;
+		}
+		if (!entity_is_task(curr)) {
+			struct cfs_rq *cfs_rq;
+
+			cfs_rq = group_cfs_rq(curr);
+			while (cfs_rq) {
+				curr = cfs_rq->curr;
+				cfs_rq = group_cfs_rq(curr);
+			}
+		}
+		orig = curr;
+		curr = hmp_get_heaviest_task(curr, -1);
+		if (!curr) {
+			raw_spin_unlock_irqrestore(&target->lock, flags);
+			continue;
+		}
+		p = task_of(curr);
+		if (hmp_up_migration(cpu, &target_cpu, curr)) {
+			cpu_rq(target_cpu)->wake_for_idle_pull = 1;
+			raw_spin_unlock_irqrestore(&target->lock, flags);
+			spin_unlock(&hmp_force_migration);
+			smp_send_reschedule(target_cpu);
+			return;
+		}
+		if (!got_target) {
+			/*
+			 * For now we just check the currently running task.
+			 * Selecting the lightest task for offloading will
+			 * require extensive book keeping.
+			 */
+			curr = hmp_get_lightest_task(orig, 1);
+			p = task_of(curr);
+			target->push_cpu = hmp_offload_down(cpu, curr);
+			if (target->push_cpu < NR_CPUS) {
+				get_task_struct(p);
+				target->migrate_task = p;
+				got_target = 1;
+				trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
+				hmp_next_down_delay(&p->se, target->push_cpu);
+			}
+		}
+		/*
+		 * We have a target with no active_balance.  If the task
+		 * is not currently running move it, otherwise let the
+		 * CPU stopper take care of it.
+		 */
+		if (got_target) {
+			if (!task_running(target, p)) {
+				trace_sched_hmp_migrate_force_running(p, 0);
+				hmp_migrate_runnable_task(target);
+			} else {
+				target->active_balance = 1;
+				force = 1;
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&target->lock, flags);
+
+		if (force)
+			stop_one_cpu_nowait(cpu_of(target),
+				hmp_active_task_migration_cpu_stop,
+				target, &target->active_balance_work);
+	}
+	spin_unlock(&hmp_force_migration);
+}
+/*
+ * hmp_idle_pull looks at little domain runqueues to see
+ * if a task should be pulled.
+ *
+ * Reuses hmp_force_migration spinlock.
+ *
+ */
+static unsigned int hmp_idle_pull(int this_cpu)
+{
+	int cpu;
+	struct sched_entity *curr, *orig;
+	struct hmp_domain *hmp_domain = NULL;
+	struct rq *target = NULL, *rq;
+	unsigned long flags, ratio = 0;
+	unsigned int force = 0;
+	struct task_struct *p = NULL;
+
+	if (!hmp_cpu_is_slowest(this_cpu))
+		hmp_domain = hmp_slower_domain(this_cpu);
+	if (!hmp_domain)
+		return 0;
+
+	if (!spin_trylock(&hmp_force_migration))
+		return 0;
+
+	/* first select a task */
+	for_each_cpu(cpu, &hmp_domain->cpus) {
+		rq = cpu_rq(cpu);
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		curr = rq->cfs.curr;
+		if (!curr) {
+			raw_spin_unlock_irqrestore(&rq->lock, flags);
+			continue;
+		}
+		if (!entity_is_task(curr)) {
+			struct cfs_rq *cfs_rq;
+
+			cfs_rq = group_cfs_rq(curr);
+			while (cfs_rq) {
+				curr = cfs_rq->curr;
+				if (!entity_is_task(curr))
+					cfs_rq = group_cfs_rq(curr);
+				else
+					cfs_rq = NULL;
+			}
+		}
+		orig = curr;
+		curr = hmp_get_heaviest_task(curr, this_cpu);
+		/* check if heaviest eligible task on this
+		 * CPU is heavier than previous task
+		 */
+		if (curr && hmp_task_eligible_for_up_migration(curr) &&
+			curr->avg.load_avg_ratio > ratio &&
+			cpumask_test_cpu(this_cpu,
+					tsk_cpus_allowed(task_of(curr)))) {
+			p = task_of(curr);
+			target = rq;
+			ratio = curr->avg.load_avg_ratio;
+		}
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+
+	if (!p)
+		goto done;
+
+	/* now we have a candidate */
+	raw_spin_lock_irqsave(&target->lock, flags);
+	if (!target->active_balance && task_rq(p) == target) {
+		get_task_struct(p);
+		target->push_cpu = this_cpu;
+		target->migrate_task = p;
+		trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
+		hmp_next_up_delay(&p->se, target->push_cpu);
+		/*
+		 * if the task isn't running move it right away.
+		 * Otherwise setup the active_balance mechanic and let
+		 * the CPU stopper do its job.
+		 */
+		if (!task_running(target, p)) {
+			trace_sched_hmp_migrate_idle_running(p, 0);
+			hmp_migrate_runnable_task(target);
+		} else {
+			target->active_balance = 1;
+			force = 1;
+		}
+	}
+	raw_spin_unlock_irqrestore(&target->lock, flags);
+
+	if (force) {
+		/* start timer to keep us awake */
+		hmp_cpu_keepalive_trigger();
+		stop_one_cpu_nowait(cpu_of(target),
+			hmp_active_task_migration_cpu_stop,
+			target, &target->active_balance_work);
+	}
+done:
+	spin_unlock(&hmp_force_migration);
+	return force;
+}
+#else
+static void hmp_force_up_migration(int this_cpu) { }
+#endif /* CONFIG_SCHED_HMP */
+
 /*
  * run_rebalance_domains is triggered when needed from the scheduler tick.
  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
@@ -5720,6 +7361,20 @@ static void run_rebalance_domains(struct softirq_action *h)
 	enum cpu_idle_type idle = this_rq->idle_balance ?
 						CPU_IDLE : CPU_NOT_IDLE;
 
+#ifdef CONFIG_SCHED_HMP
+	/* shortcut for hmp idle pull wakeups */
+	if (unlikely(this_rq->wake_for_idle_pull)) {
+		this_rq->wake_for_idle_pull = 0;
+		if (hmp_idle_pull(this_cpu)) {
+			/* break out unless running nohz idle as well */
+			if (idle != CPU_IDLE)
+				return;
+		}
+	}
+#endif
+
+	hmp_force_up_migration(this_cpu);
+
 	rebalance_domains(this_cpu, idle);
 
 	/*
@@ -5752,11 +7407,17 @@ void trigger_load_balance(struct rq *rq, int cpu)
 
 static void rq_online_fair(struct rq *rq)
 {
+#ifdef CONFIG_SCHED_HMP
+	hmp_online_cpu(rq->cpu);
+#endif
 	update_sysctl();
 }
 
 static void rq_offline_fair(struct rq *rq)
 {
+#ifdef CONFIG_SCHED_HMP
+	hmp_offline_cpu(rq->cpu);
+#endif
 	update_sysctl();
 
 	/* Ensure any throttled groups are reachable by pick_next_task */
@@ -6224,6 +7885,139 @@ __init void init_sched_fair_class(void)
 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
 	cpu_notifier(sched_ilb_notifier, 0);
 #endif
+
+#ifdef CONFIG_SCHED_HMP
+	hmp_cpu_mask_setup();
+#endif
 #endif /* SMP */
 
 }
+
+#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
+static u32 cpufreq_calc_scale(u32 min, u32 max, u32 curr)
+{
+	u32 result = curr / max;
+	return result;
+}
+
+/* Called when the CPU Frequency is changed.
+ * Once for each CPU.
+ */
+static int cpufreq_callback(struct notifier_block *nb,
+					unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	int cpu = freq->cpu;
+	struct cpufreq_extents *extents;
+
+	if (freq->flags & CPUFREQ_CONST_LOOPS)
+		return NOTIFY_OK;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return NOTIFY_OK;
+
+	/* if dynamic load scale is disabled, set the load scale to 1.0 */
+	if (!hmp_data.freqinvar_load_scale_enabled) {
+		freq_scale[cpu].curr_scale = 1024;
+		return NOTIFY_OK;
+	}
+
+	extents = &freq_scale[cpu];
+	if (extents->flags & SCHED_LOAD_FREQINVAR_SINGLEFREQ) {
+		/* If our governor was recognised as a single-freq governor,
+		 * use 1.0
+		 */
+		extents->curr_scale = 1024;
+	} else {
+		extents->curr_scale = cpufreq_calc_scale(extents->min,
+				extents->max, freq->new);
+	}
+
+	return NOTIFY_OK;
+}
+
+/* Called when the CPUFreq governor is changed.
+ * Only called for the CPUs which are actually changed by the
+ * userspace.
+ */
+static int cpufreq_policy_callback(struct notifier_block *nb,
+				       unsigned long event, void *data)
+{
+	struct cpufreq_policy *policy = data;
+	struct cpufreq_extents *extents;
+	int cpu, singleFreq = 0;
+	static const char performance_governor[] = "performance";
+	static const char powersave_governor[] = "powersave";
+
+	if (event == CPUFREQ_START)
+		return 0;
+
+	if (event != CPUFREQ_INCOMPATIBLE)
+		return 0;
+
+	/* CPUFreq governors do not accurately report the range of
+	 * CPU Frequencies they will choose from.
+	 * We recognise performance and powersave governors as
+	 * single-frequency only.
+	 */
+	if (!strncmp(policy->governor->name, performance_governor,
+			strlen(performance_governor)) ||
+		!strncmp(policy->governor->name, powersave_governor,
+				strlen(powersave_governor)))
+		singleFreq = 1;
+
+	/* Make sure that all CPUs impacted by this policy are
+	 * updated since we will only get a notification when the
+	 * user explicitly changes the policy on a CPU.
+	 */
+	for_each_cpu(cpu, policy->cpus) {
+		extents = &freq_scale[cpu];
+		extents->max = policy->max >> SCHED_FREQSCALE_SHIFT;
+		extents->min = policy->min >> SCHED_FREQSCALE_SHIFT;
+		if (!hmp_data.freqinvar_load_scale_enabled) {
+			extents->curr_scale = 1024;
+		} else if (singleFreq) {
+			extents->flags |= SCHED_LOAD_FREQINVAR_SINGLEFREQ;
+			extents->curr_scale = 1024;
+		} else {
+			extents->flags &= ~SCHED_LOAD_FREQINVAR_SINGLEFREQ;
+			extents->curr_scale = cpufreq_calc_scale(extents->min,
+					extents->max, policy->cur);
+		}
+	}
+
+	return 0;
+}
+
+static struct notifier_block cpufreq_notifier = {
+	.notifier_call  = cpufreq_callback,
+};
+static struct notifier_block cpufreq_policy_notifier = {
+	.notifier_call  = cpufreq_policy_callback,
+};
+
+static int __init register_sched_cpufreq_notifier(void)
+{
+	int ret = 0;
+
+	/* init safe defaults since there are no policies at registration */
+	for (ret = 0; ret < CONFIG_NR_CPUS; ret++) {
+		/* safe defaults */
+		freq_scale[ret].max = 1024;
+		freq_scale[ret].min = 1024;
+		freq_scale[ret].curr_scale = 1024;
+	}
+
+	pr_info("sched: registering cpufreq notifiers for scale-invariant loads\n");
+	ret = cpufreq_register_notifier(&cpufreq_policy_notifier,
+			CPUFREQ_POLICY_NOTIFIER);
+
+	if (ret != -EINVAL)
+		ret = cpufreq_register_notifier(&cpufreq_notifier,
+			CPUFREQ_TRANSITION_NOTIFIER);
+
+	return ret;
+}
+
+core_initcall(register_sched_cpufreq_notifier);
+#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dfa31d533e3f..0d19ede6849e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -142,7 +142,7 @@ struct task_group {
 
 	atomic_t load_weight;
 	atomic64_t load_avg;
-	atomic_t runnable_avg;
+	atomic_t runnable_avg, usage_avg;
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -279,7 +279,7 @@ struct cfs_rq {
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 /* These always depend on CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	u32 tg_runnable_contrib;
+	u32 tg_runnable_contrib, tg_usage_contrib;
 	u64 tg_load_contrib;
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -464,6 +464,10 @@ struct rq {
 	int active_balance;
 	int push_cpu;
 	struct cpu_stop_work active_balance_work;
+#ifdef CONFIG_SCHED_HMP
+	struct task_struct *migrate_task;
+	int wake_for_idle_pull;
+#endif
 	/* cpu of this runqueue: */
 	int cpu;
 	int online;
@@ -642,6 +646,12 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
 
 extern int group_balance_cpu(struct sched_group *sg);
 
+#ifdef CONFIG_SCHED_HMP
+static LIST_HEAD(hmp_domains);
+DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
+#define hmp_cpu_domain(cpu)	(per_cpu(hmp_cpu_domain, (cpu)))
+#endif /* CONFIG_SCHED_HMP */
+
 #endif /* CONFIG_SMP */
 
 #include "stats.h"
diff --git a/kernel/smp.c b/kernel/smp.c
index 88797cb0d23a..74ba5e2c037c 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -12,6 +12,8 @@
 #include <linux/gfp.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/smp.h>
 
 #include "smpboot.h"
 
@@ -159,8 +161,10 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
 	 * locking and barrier primitives. Generic code isn't really
 	 * equipped to do the right thing...
 	 */
-	if (ipi)
+	if (ipi) {
+		trace_smp_call_func_send(csd->func, cpu);
 		arch_send_call_function_single_ipi(cpu);
+	}
 
 	if (wait)
 		csd_lock_wait(csd);
@@ -197,8 +201,9 @@ void generic_smp_call_function_single_interrupt(void)
 		 * so save them away before making the call:
 		 */
 		csd_flags = csd->flags;
-
+		trace_smp_call_func_entry(csd->func);
 		csd->func(csd->info);
+		trace_smp_call_func_exit(csd->func);
 
 		/*
 		 * Unlocked CSDs are valid through generic_exec_single():
@@ -228,6 +233,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 	int this_cpu;
 	int err = 0;
 
+	trace_smp_call_func_send(func, cpu);
 	/*
 	 * prevent preemption and reschedule on another processor,
 	 * as well as CPU removal
@@ -245,7 +251,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 
 	if (cpu == this_cpu) {
 		local_irq_save(flags);
+		trace_smp_call_func_entry(func);
 		func(info);
+		trace_smp_call_func_exit(func);
 		local_irq_restore(flags);
 	} else {
 		if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c2f9d6ca7e5e..a2c7e4377960 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
 static bool wq_disable_numa;
 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
+
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -308,6 +317,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -4149,6 +4162,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 
+	/* see the comment above the definition of WQ_POWER_EFFICIENT */
+	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+		flags |= WQ_UNBOUND;
+
 	/* allocate wq and format name */
 	if (flags & WQ_UNBOUND)
 		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -5058,8 +5075,15 @@ static int __init init_workqueues(void)
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
+	system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+					      WQ_POWER_EFFICIENT, 0);
+	system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+					      WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+					      0);
 	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-	       !system_unbound_wq || !system_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq ||
+	       !system_power_efficient_wq ||
+	       !system_freezable_power_efficient_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
diff --git a/lib/Makefile b/lib/Makefile
index 9efe480b975e..3def8e1ce905 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -140,7 +140,8 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
 
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 
-libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o
+libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
+	       fdt_empty_tree.o
 $(foreach file, $(libfdt_files), \
 	$(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt))
 lib-$(CONFIG_LIBFDT) += $(libfdt_files)
diff --git a/lib/fdt_empty_tree.c b/lib/fdt_empty_tree.c
new file mode 100644
index 000000000000..5d30c58150ad
--- /dev/null
+++ b/lib/fdt_empty_tree.c
@@ -0,0 +1,2 @@
+#include <linux/libfdt_env.h>
+#include "../scripts/dtc/libfdt/fdt_empty_tree.c"
diff --git a/linaro/configs/android.conf b/linaro/configs/android.conf
new file mode 100644
index 000000000000..e4fd1ad19028
--- /dev/null
+++ b/linaro/configs/android.conf
@@ -0,0 +1,42 @@
+CONFIG_IPV6=y
+# CONFIG_IPV6_SIT is not set
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_HAS_WAKELOCK=y
+CONFIG_WAKELOCK=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_DM_CRYPT=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_ANDROID_PARANOID_NETWORK=y
+CONFIG_NET_ACTIVITY_STATS=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+CONFIG_USB_G_ANDROID=y
+CONFIG_SWITCH=y
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOGGER=y
+CONFIG_ANDROID_TIMED_OUTPUT=y
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_ANDROID_INTF_ALARM_DEV=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_FUSE_FS=y
+CONFIG_CPU_FREQ_GOV_INTERACTIVE=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y
+CONFIG_ION=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SW_SYNC_USER=y
+CONFIG_ION_TEST=y
+CONFIG_ION_DUMMY=y
+CONFIG_ADF=y
+CONFIG_ADF_FBDEV=y
+CONFIG_ADF_MEMBLOCK=y
+CONFIG_DMA_SHARED_BUFFER=y
+CONFIG_TUN=y
diff --git a/linaro/configs/arndale.conf b/linaro/configs/arndale.conf
new file mode 100644
index 000000000000..109052f2f6ec
--- /dev/null
+++ b/linaro/configs/arndale.conf
@@ -0,0 +1,66 @@
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_ARCH_EXYNOS=y
+CONFIG_S3C_LOWLEVEL_UART_PORT=2
+CONFIG_ARCH_EXYNOS5=y
+# CONFIG_EXYNOS_ATAGS is not set
+CONFIG_MACH_EXYNOS4_DT=y
+CONFIG_VMSPLIT_2G=y
+CONFIG_NR_CPUS=2
+CONFIG_HIGHMEM=y
+# CONFIG_COMPACTION is not set
+CONFIG_ARM_APPENDED_DTB=y
+CONFIG_ARM_ATAG_DTB_COMPAT=y
+CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init= mem=256M"
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_VFP=y
+CONFIG_NEON=y
+CONFIG_PM_RUNTIME=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_SATA_EXYNOS=y
+CONFIG_AX88796=y
+CONFIG_AX88796_93CX6=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_GPIO=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_SAMSUNG=y
+CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_S3C2410=y
+CONFIG_THERMAL=y
+CONFIG_CPU_THERMAL=y
+CONFIG_EXYNOS_THERMAL=y
+CONFIG_MFD_SEC_CORE=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_S5M8767=y
+CONFIG_DRM=y
+CONFIG_DRM_LOAD_EDID_FIRMWARE=y
+CONFIG_DRM_EXYNOS=y
+CONFIG_DRM_EXYNOS_DMABUF=y
+CONFIG_DRM_EXYNOS_HDMI=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_MMC=y
+CONFIG_MMC_UNSAFE_RESUME=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_MMC_DW_EXYNOS=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_S3C=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_INFO=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_DEBUG_USER=y
+CONFIG_TUN=y
diff --git a/linaro/configs/big-LITTLE-IKS.conf b/linaro/configs/big-LITTLE-IKS.conf
new file mode 100644
index 000000000000..b067fde86eaa
--- /dev/null
+++ b/linaro/configs/big-LITTLE-IKS.conf
@@ -0,0 +1,5 @@
+CONFIG_BIG_LITTLE=y
+CONFIG_BL_SWITCHER=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
diff --git a/linaro/configs/big-LITTLE-MP.conf b/linaro/configs/big-LITTLE-MP.conf
new file mode 100644
index 000000000000..ced3cf974f13
--- /dev/null
+++ b/linaro/configs/big-LITTLE-MP.conf
@@ -0,0 +1,12 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_NO_HZ=y
+CONFIG_SCHED_MC=y
+CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE=y
+CONFIG_SCHED_HMP=y
+CONFIG_HMP_FAST_CPU_MASK=""
+CONFIG_HMP_SLOW_CPU_MASK=""
+CONFIG_HMP_VARIABLE_SCALE=y
+CONFIG_HMP_FREQUENCY_INVARIANT_SCALE=y
+CONFIG_SCHED_HMP_LITTLE_PACKING=y
diff --git a/linaro/configs/bigendian.conf b/linaro/configs/bigendian.conf
new file mode 100644
index 000000000000..6a1020299e85
--- /dev/null
+++ b/linaro/configs/bigendian.conf
@@ -0,0 +1,4 @@
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CPU_ENDIAN_BE8=y
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_MMC_DW_IDMAC is not set
diff --git a/linaro/configs/debug.conf b/linaro/configs/debug.conf
new file mode 100644
index 000000000000..36980566b2d8
--- /dev/null
+++ b/linaro/configs/debug.conf
@@ -0,0 +1 @@
+CONFIG_PROVE_LOCKING=y
diff --git a/linaro/configs/distribution.conf b/linaro/configs/distribution.conf
new file mode 100644
index 000000000000..729b9b8979e4
--- /dev/null
+++ b/linaro/configs/distribution.conf
@@ -0,0 +1,49 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_CGROUPS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
+CONFIG_SECCOMP=y
+CONFIG_CC_STACKPROTECTOR=y
+CONFIG_SYN_COOKIES=y
+CONFIG_IPV6=y
+CONFIG_NETLABEL=y
+CONFIG_BRIDGE_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NETFILTER_XT_CONNMARK=m
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE=m
+CONFIG_TUN=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+# CONFIG_DEVKMEM is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_STRICT_DEVMEM=y
+CONFIG_SECURITY=y
+CONFIG_LSM_MMAP_MIN_ADDR=0
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SMACK=y
+CONFIG_SECURITY_APPARMOR=y
+CONFIG_DEFAULT_SECURITY_APPARMOR=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
diff --git a/linaro/configs/highbank.conf b/linaro/configs/highbank.conf
new file mode 100644
index 000000000000..bf0f3c14b0d0
--- /dev/null
+++ b/linaro/configs/highbank.conf
@@ -0,0 +1,40 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_ARCH_HIGHBANK=y
+CONFIG_ARM_ERRATA_754322=y
+CONFIG_SMP=y
+CONFIG_SCHED_MC=y
+CONFIG_AEABI=y
+CONFIG_CMDLINE="console=ttyAMA0"
+CONFIG_CPU_IDLE=y
+CONFIG_VFP=y
+CONFIG_NEON=y
+CONFIG_NET=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_SATA_HIGHBANK=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_CALXEDA_XGMAC=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_SI=y
+CONFIG_I2C=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+CONFIG_SPI_PL022=y
+CONFIG_GPIO_PL061=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
+CONFIG_EDAC_HIGHBANK_MC=y
+CONFIG_EDAC_HIGHBANK_L2=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_DMADEVICES=y
+CONFIG_PL330_DMA=y
diff --git a/linaro/configs/kvm-guest.conf b/linaro/configs/kvm-guest.conf
new file mode 100644
index 000000000000..00e84a3ba1ec
--- /dev/null
+++ b/linaro/configs/kvm-guest.conf
@@ -0,0 +1,11 @@
+CONFIG_BALLOON_COMPACTION=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_NET=y
+CONFIG_HVC_DRIVER=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VIRTUALIZATION=y
+# CONFIG_THUMB2_KERNEL is not set
diff --git a/linaro/configs/kvm-host.conf b/linaro/configs/kvm-host.conf
new file mode 100644
index 000000000000..21a40e031372
--- /dev/null
+++ b/linaro/configs/kvm-host.conf
@@ -0,0 +1,11 @@
+CONFIG_VIRTUALIZATION=y
+CONFIG_ARM_LPAE=y
+CONFIG_ARM_VIRT_EXT=y
+CONFIG_HAVE_KVM_IRQCHIP=y
+CONFIG_KVM_ARM_HOST=y
+CONFIG_KVM_ARM_MAX_VCPUS=4
+CONFIG_KVM_ARM_TIMER=y
+CONFIG_KVM_ARM_VGIC=y
+CONFIG_KVM_MMIO=y
+CONFIG_KVM=y
+CONFIG_BLK_DEV_NBD=m
diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf
new file mode 100644
index 000000000000..bbd0f160398f
--- /dev/null
+++ b/linaro/configs/linaro-base.conf
@@ -0,0 +1,118 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_HOTPLUG=y
+CONFIG_PERF_EVENTS=y
+CONFIG_SLAB=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_SMP=y
+CONFIG_SCHED_MC=y
+CONFIG_SCHED_SMT=y
+CONFIG_THUMB2_KERNEL=y
+CONFIG_AEABI=y
+# CONFIG_OABI_COMPAT is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_BINFMT_MISC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_LRO is not set
+CONFIG_NETFILTER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_OOPS=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_NAND=y
+CONFIG_NETDEVICES=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_BTRFS_FS=y
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_ECRYPT_FS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RUBIN=y
+CONFIG_CRAMFS=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V2 is not set
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_KEYS=y
+CONFIG_CRYPTO_MICHAEL_MIC=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC_ITU_T=y
+CONFIG_CRC7=y
+CONFIG_HW_PERF_EVENTS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_DEVMEM=y
+CONFIG_KGDB=y
+CONFIG_KGDB_TESTS=y
+CONFIG_OF_IDLE_STATES=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_MAILBOX=y
+CONFIG_AUDIT=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=4096
+CONFIG_SECURITY_SELINUX=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_DEBUG_INFO=y
+CONFIG_FANOTIFY=y
diff --git a/linaro/configs/omap4.conf b/linaro/configs/omap4.conf
new file mode 100644
index 000000000000..50fb9d9cb5b5
--- /dev/null
+++ b/linaro/configs/omap4.conf
@@ -0,0 +1,194 @@
+CONFIG_EXPERT=y
+CONFIG_KPROBES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_GPIO_PCA953X=y
+CONFIG_OMAP_RESET_CLOCKS=y
+CONFIG_OMAP_MUX_DEBUG=y
+CONFIG_ARCH_OMAP2PLUS=y
+CONFIG_SOC_OMAP5=y
+# CONFIG_ARCH_OMAP2 is not set
+CONFIG_ARCH_VEXPRESS_CA9X4=y
+CONFIG_ARM_THUMBEE=y
+CONFIG_ARM_ERRATA_411920=y
+CONFIG_NR_CPUS=2
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/mmcblk0p2 rootwait console=ttyO2,115200"
+CONFIG_KEXEC=y
+CONFIG_PM_DEBUG=y
+CONFIG_CAN=m
+CONFIG_CAN_C_CAN=m
+CONFIG_CAN_C_CAN_PLATFORM=m
+CONFIG_BT=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_LL=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_RC_PID=y
+CONFIG_MAC80211_RC_DEFAULT_PID=y
+CONFIG_CMA=y
+CONFIG_MTD_NAND_OMAP2=y
+CONFIG_MTD_ONENAND=y
+CONFIG_MTD_ONENAND_VERIFY_WRITE=y
+CONFIG_MTD_ONENAND_OMAP2=y
+CONFIG_MTD_UBI=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_SENSORS_TSL2550=m
+CONFIG_SENSORS_LIS3_I2C=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_KS8851=y
+CONFIG_KS8851_MLL=y
+CONFIG_SMC91X=y
+CONFIG_SMSC911X=y
+CONFIG_TI_CPSW=y
+CONFIG_SMSC_PHY=y
+CONFIG_USB_USBNET=y
+CONFIG_USB_NET_SMSC95XX=y
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_KC2190=y
+CONFIG_LIBERTAS=m
+CONFIG_LIBERTAS_USB=m
+CONFIG_LIBERTAS_SDIO=m
+CONFIG_LIBERTAS_DEBUG=y
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_GPIO=y
+CONFIG_KEYBOARD_MATRIX=m
+CONFIG_KEYBOARD_TWL4030=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_ADS7846=y
+CONFIG_INPUT_TWL4030_PWRBUTTON=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_OMAP=y
+CONFIG_SERIAL_OMAP_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_SPI=y
+CONFIG_SPI_OMAP24XX=y
+CONFIG_PINCTRL_SINGLE=y
+CONFIG_DEBUG_GPIO=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_TWL4030=y
+CONFIG_W1=y
+CONFIG_SENSORS_LM75=m
+CONFIG_WATCHDOG=y
+CONFIG_OMAP_WATCHDOG=y
+CONFIG_TWL4030_WATCHDOG=y
+CONFIG_MFD_TPS65217=y
+CONFIG_MFD_TPS65910=y
+CONFIG_TWL6040_CORE=y
+CONFIG_REGULATOR_TPS65023=y
+CONFIG_REGULATOR_TPS6507X=y
+CONFIG_REGULATOR_TPS65217=y
+CONFIG_REGULATOR_TPS65910=y
+CONFIG_REGULATOR_TWL4030=y
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_OMAP2_DSS=m
+CONFIG_OMAP2_DSS_RFBI=y
+CONFIG_OMAP2_DSS_SDI=y
+CONFIG_OMAP2_DSS_DSI=y
+CONFIG_FB_OMAP2=m
+CONFIG_PANEL_GENERIC_DPI=m
+CONFIG_PANEL_TFP410=m
+CONFIG_PANEL_SHARP_LS037V7DW01=m
+CONFIG_PANEL_NEC_NL8048HL11_01B=m
+CONFIG_PANEL_TAAL=m
+CONFIG_PANEL_TPO_TD043MTEA1=m
+CONFIG_PANEL_ACX565AKM=m
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+CONFIG_LCD_PLATFORM=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FONTS=y
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_SOC=m
+CONFIG_SND_OMAP_SOC=m
+CONFIG_SND_OMAP_SOC_OMAP_TWL4030=m
+CONFIG_SND_OMAP_SOC_OMAP_ABE_TWL6040=m
+CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=m
+CONFIG_USB=y
+CONFIG_USB_DEBUG=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_WDM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_TEST=y
+CONFIG_USB_PHY=y
+CONFIG_NOP_USB_XCEIV=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_GADGET_DEBUG=y
+CONFIG_USB_GADGET_DEBUG_FILES=y
+CONFIG_USB_GADGET_DEBUG_FS=y
+CONFIG_USB_ZERO=m
+CONFIG_MMC=y
+CONFIG_MMC_UNSAFE_RESUME=y
+CONFIG_SDIO_UART=y
+CONFIG_MMC_ARMMMCI=y
+CONFIG_MMC_OMAP=y
+CONFIG_MMC_OMAP_HS=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_ONESHOT=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_GPIO=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_TWL92330=y
+CONFIG_RTC_DRV_TWL4030=y
+CONFIG_RTC_DRV_OMAP=y
+CONFIG_DMADEVICES=y
+CONFIG_DMA_OMAP=y
+# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_UBIFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_LIBCRC32C=y
+# CONFIG_CPU_FREQ is not set
diff --git a/linaro/configs/preempt-rt.conf b/linaro/configs/preempt-rt.conf
new file mode 100644
index 000000000000..98e036d75442
--- /dev/null
+++ b/linaro/configs/preempt-rt.conf
@@ -0,0 +1,4 @@
+CONFIG_PREEMPT=y
+CONFIG_PREEMPT_RT_FULL=y
+CONFIG_SLUB=y
+# CONFIG_CPU_FREQ is not set
diff --git a/linaro/configs/ubuntu-minimal.conf b/linaro/configs/ubuntu-minimal.conf
new file mode 120000
index 000000000000..794e82f3bc17
--- /dev/null
+++ b/linaro/configs/ubuntu-minimal.conf
@@ -0,0 +1 @@
+distribution.conf
+\ No newline at end of file
diff --git a/linaro/configs/vexpress-tuning.conf b/linaro/configs/vexpress-tuning.conf
new file mode 100644
index 000000000000..adea6cc66ded
--- /dev/null
+++ b/linaro/configs/vexpress-tuning.conf
@@ -0,0 +1 @@
+# CONFIG_PROVE_LOCKING is not set
diff --git a/linaro/configs/vexpress.conf b/linaro/configs/vexpress.conf
new file mode 100644
index 000000000000..73cae2b8f8f7
--- /dev/null
+++ b/linaro/configs/vexpress.conf
@@ -0,0 +1,63 @@
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_VEXPRESS_CA9X4=y
+CONFIG_BIG_LITTLE=y
+CONFIG_ARCH_VEXPRESS_TC2=y
+CONFIG_ARCH_VEXPRESS_DCSCB=y
+CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y
+CONFIG_PM_OPP=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_INTERACTIVE=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_ARM_PSCI=y
+CONFIG_HAVE_ARM_ARCH_TIMER=y
+CONFIG_NR_CPUS=8
+CONFIG_HIGHMEM=y
+CONFIG_HIGHPTE=y
+CONFIG_CMDLINE="console=ttyAMA0,38400n8 root=/dev/mmcblk0p2 rootwait mmci.fmax=4000000"
+CONFIG_VFP=y
+CONFIG_NEON=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SMSC911X=y
+CONFIG_SMC91X=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_SERIO_AMBAKMI=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_FB=y
+CONFIG_FB_ARMCLCD=y
+CONFIG_FB_ARMHDLCD=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_ARMAACI=y
+CONFIG_USB=y
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_VEXPRESS_CONFIG=y
+CONFIG_SENSORS_VEXPRESS=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_VEXPRESS=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
diff --git a/linaro/configs/vexpress64.conf b/linaro/configs/vexpress64.conf
new file mode 100644
index 000000000000..c9b66a198586
--- /dev/null
+++ b/linaro/configs/vexpress64.conf
@@ -0,0 +1,55 @@
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_CMDLINE="console=ttyAMA0"
+CONFIG_COMPAT=y
+CONFIG_SMC91X=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_SERIO_AMBAKMI=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+# CONFIG_SERIO_I8042 is not set
+CONFIG_FB=y
+CONFIG_FB_ARMCLCD=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_CMA=y
+CONFIG_DMA_CMA=y
+CONFIG_COMMON_CLK_SCPI=y
+CONFIG_SMSC911X=y
+CONFIG_I2C=y
+CONFIG_ARM_MHU_MBOX=y
+CONFIG_ARM_SCPI_PROTOCOL=y
+CONFIG_USB_HIDDEV=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB=y
+CONFIG_USB_ULPI=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_SYNOPSYS=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_PHY=y
+CONFIG_PM_OPP=y
+CONFIG_GENERIC_CPUFREQ_CPU0=y
+CONFIG_ARM_BIG_LITTLE_CPUFREQ=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM64_CPUIDLE=y
+CONFIG_ARM64_CRYPTO=y
diff --git a/linaro/configs/xen.conf b/linaro/configs/xen.conf
new file mode 100644
index 000000000000..d24fabbea076
--- /dev/null
+++ b/linaro/configs/xen.conf
@@ -0,0 +1,7 @@
+CONFIG_XEN=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_BACKEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_BACKEND=y
+CONFIG_XENFS=y
+CONFIG_XEN_COMPAT_XENFS=y
diff --git a/mm/Kconfig b/mm/Kconfig
index e742d06285b7..b2d1aed56439 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -477,3 +477,30 @@ config FRONTSWAP
 	  and swap data is stored as normal on the matching swap device.
 
 	  If unsure, say Y to enable frontswap.
+
+config GENERIC_EARLY_IOREMAP
+	bool
+
+config CMA
+	bool "Contiguous Memory Allocator"
+	depends on HAVE_MEMBLOCK
+	select MIGRATION
+	select MEMORY_ISOLATION
+	help
+	  This enables the Contiguous Memory Allocator which allows other
+	  subsystems to allocate big physically-contiguous blocks of memory.
+	  CMA reserves a region of memory and allows only movable pages to
+	  be allocated from it. This way, the kernel can use the memory for
+	  pagecache and when a subsystem requests for contiguous area, the
+	  allocated pages are migrated away to serve the contiguous request.
+
+	  If unsure, say "n".
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL && CMA
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
diff --git a/mm/Makefile b/mm/Makefile
index 72c5acb9345f..89244cb96221 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -58,3 +58,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
new file mode 100644
index 000000000000..e10ccd299d66
--- /dev/null
+++ b/mm/early_ioremap.c
@@ -0,0 +1,245 @@
+/*
+ * Provide common bits of early_ioremap() support for architectures needing
+ * temporary mappings during boot before ioremap() is available.
+ *
+ * This is mostly a direct copy of the x86 early_ioremap implementation.
+ *
+ * (C) Copyright 1995 1996, 2014 Linus Torvalds
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <asm/fixmap.h>
+
+#ifdef CONFIG_MMU
+static int early_ioremap_debug __initdata;
+
+static int __init early_ioremap_debug_setup(char *str)
+{
+	early_ioremap_debug = 1;
+
+	return 0;
+}
+early_param("early_ioremap_debug", early_ioremap_debug_setup);
+
+static int after_paging_init __initdata;
+
+void __init __weak early_ioremap_shutdown(void)
+{
+}
+
+void __init early_ioremap_reset(void)
+{
+	early_ioremap_shutdown();
+	after_paging_init = 1;
+}
+
+/*
+ * Generally, ioremap() is available after paging_init() has been called.
+ * Architectures wanting to allow early_ioremap after paging_init() can
+ * define __late_set_fixmap and __late_clear_fixmap to do the right thing.
+ */
+#ifndef __late_set_fixmap
+static inline void __init __late_set_fixmap(enum fixed_addresses idx,
+					    phys_addr_t phys, pgprot_t prot)
+{
+	BUG();
+}
+#endif
+
+#ifndef __late_clear_fixmap
+static inline void __init __late_clear_fixmap(enum fixed_addresses idx)
+{
+	BUG();
+}
+#endif
+
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
+static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
+
+void __init early_ioremap_setup(void)
+{
+	int i;
+
+	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+		if (WARN_ON(prev_map[i]))
+			break;
+
+	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+		slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+}
+
+static int __init check_early_ioremap_leak(void)
+{
+	int count = 0;
+	int i;
+
+	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+		if (prev_map[i])
+			count++;
+
+	if (WARN(count, KERN_WARNING
+		 "Debug warning: early ioremap leak of %d areas detected.\n"
+		 "please boot with early_ioremap_debug and report the dmesg.\n",
+		 count))
+		return 1;
+	return 0;
+}
+late_initcall(check_early_ioremap_leak);
+
+static void __init __iomem *
+__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
+{
+	unsigned long offset;
+	resource_size_t last_addr;
+	unsigned int nrpages;
+	enum fixed_addresses idx;
+	int i, slot;
+
+	WARN_ON(system_state != SYSTEM_BOOTING);
+
+	slot = -1;
+	for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+		if (!prev_map[i]) {
+			slot = i;
+			break;
+		}
+	}
+
+	if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n",
+		 __func__, (u64)phys_addr, size))
+		return NULL;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (WARN_ON(!size || last_addr < phys_addr))
+		return NULL;
+
+	prev_size[slot] = size;
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	/*
+	 * Mappings have to fit in the FIX_BTMAP area.
+	 */
+	nrpages = size >> PAGE_SHIFT;
+	if (WARN_ON(nrpages > NR_FIX_BTMAPS))
+		return NULL;
+
+	/*
+	 * Ok, go for it..
+	 */
+	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
+	while (nrpages > 0) {
+		if (after_paging_init)
+			__late_set_fixmap(idx, phys_addr, prot);
+		else
+			__early_set_fixmap(idx, phys_addr, prot);
+		phys_addr += PAGE_SIZE;
+		--idx;
+		--nrpages;
+	}
+	WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n",
+	     __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]);
+
+	prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
+	return prev_map[slot];
+}
+
+void __init early_iounmap(void __iomem *addr, unsigned long size)
+{
+	unsigned long virt_addr;
+	unsigned long offset;
+	unsigned int nrpages;
+	enum fixed_addresses idx;
+	int i, slot;
+
+	slot = -1;
+	for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+		if (prev_map[i] == addr) {
+			slot = i;
+			break;
+		}
+	}
+
+	if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n",
+		 addr, size))
+		return;
+
+	if (WARN(prev_size[slot] != size,
+		 "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
+		 addr, size, slot, prev_size[slot]))
+		return;
+
+	WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n",
+	     addr, size, slot);
+
+	virt_addr = (unsigned long)addr;
+	if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)))
+		return;
+
+	offset = virt_addr & ~PAGE_MASK;
+	nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
+
+	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
+	while (nrpages > 0) {
+		if (after_paging_init)
+			__late_clear_fixmap(idx);
+		else
+			__early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR);
+		--idx;
+		--nrpages;
+	}
+	prev_map[slot] = NULL;
+}
+
+/* Remap an IO device */
+void __init __iomem *
+early_ioremap(resource_size_t phys_addr, unsigned long size)
+{
+	return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO);
+}
+
+/* Remap memory */
+void __init *
+early_memremap(resource_size_t phys_addr, unsigned long size)
+{
+	return (__force void *)__early_ioremap(phys_addr, size,
+					       FIXMAP_PAGE_NORMAL);
+}
+#else /* CONFIG_MMU */
+
+void __init __iomem *
+early_ioremap(resource_size_t phys_addr, unsigned long size)
+{
+	return (__force void __iomem *)phys_addr;
+}
+
+/* Remap memory */
+void __init *
+early_memremap(resource_size_t phys_addr, unsigned long size)
+{
+	return (void *)phys_addr;
+}
+
+void __init early_iounmap(void __iomem *addr, unsigned long size)
+{
+}
+
+#endif /* CONFIG_MMU */
+
+
+void __init early_memunmap(void *addr, unsigned long size)
+{
+	early_iounmap((__force void __iomem *)addr, size);
+}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7de4f67c81fe..ea32a04296f0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -435,25 +435,6 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 	return (get_vma_private_data(vma) & flag) != 0;
 }
 
-/* Decrement the reserved pages in the hugepage pool by one */
-static void decrement_hugepage_resv_vma(struct hstate *h,
-			struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_NORESERVE)
-		return;
-
-	if (vma->vm_flags & VM_MAYSHARE) {
-		/* Shared mappings always use reserves */
-		h->resv_huge_pages--;
-	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-		/*
-		 * Only the process that called mmap() has reserves for
-		 * private mappings.
-		 */
-		h->resv_huge_pages--;
-	}
-}
-
 /* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
@@ -463,12 +444,35 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 }
 
 /* Returns true if the VMA has associated reserve pages */
-static int vma_has_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma, long chg)
 {
+	if (vma->vm_flags & VM_NORESERVE) {
+		/*
+		 * This address is already reserved by other process(chg == 0),
+		 * so, we should decrement reserved count. Without decrementing,
+		 * reserve count remains after releasing inode, because this
+		 * allocated page will go into page cache and is regarded as
+		 * coming from reserved pool in releasing step.  Currently, we
+		 * don't have any other solution to deal with this situation
+		 * properly, so add work-around here.
+		 */
+		if (vma->vm_flags & VM_MAYSHARE && chg == 0)
+			return 1;
+		else
+			return 0;
+	}
+
+	/* Shared mappings always use reserves */
 	if (vma->vm_flags & VM_MAYSHARE)
 		return 1;
+
+	/*
+	 * Only the process that called mmap() has reserves for
+	 * private mappings.
+	 */
 	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		return 1;
+
 	return 0;
 }
 
@@ -536,7 +540,8 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
-				unsigned long address, int avoid_reserve)
+				unsigned long address, int avoid_reserve,
+				long chg)
 {
 	struct page *page = NULL;
 	struct mempolicy *mpol;
@@ -555,7 +560,7 @@ retry_cpuset:
 	 * have no page reserves. This check ensures that reservations are
 	 * not "stolen". The child may still get SIGKILLed
 	 */
-	if (!vma_has_reserves(vma) &&
+	if (!vma_has_reserves(vma, chg) &&
 			h->free_huge_pages - h->resv_huge_pages == 0)
 		goto err;
 
@@ -568,8 +573,13 @@ retry_cpuset:
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
 			page = dequeue_huge_page_node(h, zone_to_nid(zone));
 			if (page) {
-				if (!avoid_reserve)
-					decrement_hugepage_resv_vma(h, vma);
+				if (avoid_reserve)
+					break;
+				if (!vma_has_reserves(vma, chg))
+					break;
+
+				SetPagePrivate(page);
+				h->resv_huge_pages--;
 				break;
 			}
 		}
@@ -627,15 +637,20 @@ static void free_huge_page(struct page *page)
 	int nid = page_to_nid(page);
 	struct hugepage_subpool *spool =
 		(struct hugepage_subpool *)page_private(page);
+	bool restore_reserve;
 
 	set_page_private(page, 0);
 	page->mapping = NULL;
 	BUG_ON(page_count(page));
 	BUG_ON(page_mapcount(page));
+	restore_reserve = PagePrivate(page);
 
 	spin_lock(&hugetlb_lock);
 	hugetlb_cgroup_uncharge_page(hstate_index(h),
 				     pages_per_huge_page(h), page);
+	if (restore_reserve)
+		h->resv_huge_pages++;
+
 	if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
 		/* remove the page from active list */
 		list_del(&page->lru);
@@ -796,33 +811,6 @@ static int hstate_next_node_to_alloc(struct hstate *h,
 	return nid;
 }
 
-static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
-{
-	struct page *page;
-	int start_nid;
-	int next_nid;
-	int ret = 0;
-
-	start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-	next_nid = start_nid;
-
-	do {
-		page = alloc_fresh_huge_page_node(h, next_nid);
-		if (page) {
-			ret = 1;
-			break;
-		}
-		next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-	} while (next_nid != start_nid);
-
-	if (ret)
-		count_vm_event(HTLB_BUDDY_PGALLOC);
-	else
-		count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
-	return ret;
-}
-
 /*
  * helper for free_pool_huge_page() - return the previously saved
  * node ["this node"] from which to free a huge page.  Advance the
@@ -841,6 +829,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 	return nid;
 }
 
+#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask)		\
+	for (nr_nodes = nodes_weight(*mask);				\
+		nr_nodes > 0 &&						\
+		((node = hstate_next_node_to_alloc(hs, mask)) || 1);	\
+		nr_nodes--)
+
+#define for_each_node_mask_to_free(hs, nr_nodes, node, mask)		\
+	for (nr_nodes = nodes_weight(*mask);				\
+		nr_nodes > 0 &&						\
+		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
+		nr_nodes--)
+
+static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+{
+	struct page *page;
+	int nr_nodes, node;
+	int ret = 0;
+
+	for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+		page = alloc_fresh_huge_page_node(h, node);
+		if (page) {
+			ret = 1;
+			break;
+		}
+	}
+
+	if (ret)
+		count_vm_event(HTLB_BUDDY_PGALLOC);
+	else
+		count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+
+	return ret;
+}
+
 /*
  * Free huge page from pool from next node to free.
  * Attempt to keep persistent huge pages more or less
@@ -850,36 +872,31 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 							 bool acct_surplus)
 {
-	int start_nid;
-	int next_nid;
+	int nr_nodes, node;
 	int ret = 0;
 
-	start_nid = hstate_next_node_to_free(h, nodes_allowed);
-	next_nid = start_nid;
-
-	do {
+	for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
 		/*
 		 * If we're returning unused surplus pages, only examine
 		 * nodes with surplus pages.
 		 */
-		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
-		    !list_empty(&h->hugepage_freelists[next_nid])) {
+		if ((!acct_surplus || h->surplus_huge_pages_node[node]) &&
+		    !list_empty(&h->hugepage_freelists[node])) {
 			struct page *page =
-				list_entry(h->hugepage_freelists[next_nid].next,
+				list_entry(h->hugepage_freelists[node].next,
 					  struct page, lru);
 			list_del(&page->lru);
 			h->free_huge_pages--;
-			h->free_huge_pages_node[next_nid]--;
+			h->free_huge_pages_node[node]--;
 			if (acct_surplus) {
 				h->surplus_huge_pages--;
-				h->surplus_huge_pages_node[next_nid]--;
+				h->surplus_huge_pages_node[node]--;
 			}
 			update_and_free_page(h, page);
 			ret = 1;
 			break;
 		}
-		next_nid = hstate_next_node_to_free(h, nodes_allowed);
-	} while (next_nid != start_nid);
+	}
 
 	return ret;
 }
@@ -968,10 +985,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
  */
 struct page *alloc_huge_page_node(struct hstate *h, int nid)
 {
-	struct page *page;
+	struct page *page = NULL;
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_node(h, nid);
+	if (h->free_huge_pages - h->resv_huge_pages > 0)
+		page = dequeue_huge_page_node(h, nid);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
@@ -1059,11 +1077,8 @@ free:
 	spin_unlock(&hugetlb_lock);
 
 	/* Free unnecessary surplus pages to the buddy allocator */
-	if (!list_empty(&surplus_list)) {
-		list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
-			put_page(page);
-		}
-	}
+	list_for_each_entry_safe(page, tmp, &surplus_list, lru)
+		put_page(page);
 	spin_lock(&hugetlb_lock);
 
 	return ret;
@@ -1131,9 +1146,9 @@ static long vma_needs_reservation(struct hstate *h,
 	} else  {
 		long err;
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-		struct resv_map *reservations = vma_resv_map(vma);
+		struct resv_map *resv = vma_resv_map(vma);
 
-		err = region_chg(&reservations->regions, idx, idx + 1);
+		err = region_chg(&resv->regions, idx, idx + 1);
 		if (err < 0)
 			return err;
 		return 0;
@@ -1151,10 +1166,10 @@ static void vma_commit_reservation(struct hstate *h,
 
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-		struct resv_map *reservations = vma_resv_map(vma);
+		struct resv_map *resv = vma_resv_map(vma);
 
 		/* Mark this page used in the map. */
-		region_add(&reservations->regions, idx, idx + 1);
+		region_add(&resv->regions, idx, idx + 1);
 	}
 }
 
@@ -1180,38 +1195,35 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	chg = vma_needs_reservation(h, vma, addr);
 	if (chg < 0)
 		return ERR_PTR(-ENOMEM);
-	if (chg)
-		if (hugepage_subpool_get_pages(spool, chg))
+	if (chg || avoid_reserve)
+		if (hugepage_subpool_get_pages(spool, 1))
 			return ERR_PTR(-ENOSPC);
 
 	ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
 	if (ret) {
-		hugepage_subpool_put_pages(spool, chg);
+		if (chg || avoid_reserve)
+			hugepage_subpool_put_pages(spool, 1);
 		return ERR_PTR(-ENOSPC);
 	}
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
-	if (page) {
-		/* update page cgroup details */
-		hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h),
-					     h_cg, page);
-		spin_unlock(&hugetlb_lock);
-	} else {
+	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+	if (!page) {
 		spin_unlock(&hugetlb_lock);
 		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
 		if (!page) {
 			hugetlb_cgroup_uncharge_cgroup(idx,
 						       pages_per_huge_page(h),
 						       h_cg);
-			hugepage_subpool_put_pages(spool, chg);
+			if (chg || avoid_reserve)
+				hugepage_subpool_put_pages(spool, 1);
 			return ERR_PTR(-ENOSPC);
 		}
 		spin_lock(&hugetlb_lock);
-		hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h),
-					     h_cg, page);
 		list_move(&page->lru, &h->hugepage_activelist);
-		spin_unlock(&hugetlb_lock);
+		/* Fall through */
 	}
+	hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
+	spin_unlock(&hugetlb_lock);
 
 	set_page_private(page, (unsigned long)spool);
 
@@ -1222,14 +1234,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 int __weak alloc_bootmem_huge_page(struct hstate *h)
 {
 	struct huge_bootmem_page *m;
-	int nr_nodes = nodes_weight(node_states[N_MEMORY]);
+	int nr_nodes, node;
 
-	while (nr_nodes) {
+	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
 		void *addr;
 
-		addr = __alloc_bootmem_node_nopanic(
-				NODE_DATA(hstate_next_node_to_alloc(h,
-						&node_states[N_MEMORY])),
+		addr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
 				huge_page_size(h), huge_page_size(h), 0);
 
 		if (addr) {
@@ -1241,7 +1251,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 			m = addr;
 			goto found;
 		}
-		nr_nodes--;
 	}
 	return 0;
 
@@ -1380,48 +1389,28 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count,
 static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
 				int delta)
 {
-	int start_nid, next_nid;
-	int ret = 0;
+	int nr_nodes, node;
 
 	VM_BUG_ON(delta != -1 && delta != 1);
 
-	if (delta < 0)
-		start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-	else
-		start_nid = hstate_next_node_to_free(h, nodes_allowed);
-	next_nid = start_nid;
-
-	do {
-		int nid = next_nid;
-		if (delta < 0)  {
-			/*
-			 * To shrink on this node, there must be a surplus page
-			 */
-			if (!h->surplus_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_alloc(h,
-								nodes_allowed);
-				continue;
-			}
+	if (delta < 0) {
+		for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+			if (h->surplus_huge_pages_node[node])
+				goto found;
 		}
-		if (delta > 0) {
-			/*
-			 * Surplus cannot exceed the total number of pages
-			 */
-			if (h->surplus_huge_pages_node[nid] >=
-						h->nr_huge_pages_node[nid]) {
-				next_nid = hstate_next_node_to_free(h,
-								nodes_allowed);
-				continue;
-			}
+	} else {
+		for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
+			if (h->surplus_huge_pages_node[node] <
+					h->nr_huge_pages_node[node])
+				goto found;
 		}
+	}
+	return 0;
 
-		h->surplus_huge_pages += delta;
-		h->surplus_huge_pages_node[nid] += delta;
-		ret = 1;
-		break;
-	} while (next_nid != start_nid);
-
-	return ret;
+found:
+	h->surplus_huge_pages += delta;
+	h->surplus_huge_pages_node[node] += delta;
+	return 1;
 }
 
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
@@ -2233,7 +2222,7 @@ out:
 
 static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 {
-	struct resv_map *reservations = vma_resv_map(vma);
+	struct resv_map *resv = vma_resv_map(vma);
 
 	/*
 	 * This new VMA should share its siblings reservation map if present.
@@ -2243,34 +2232,34 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 	 * after this open call completes.  It is therefore safe to take a
 	 * new reference here without additional locking.
 	 */
-	if (reservations)
-		kref_get(&reservations->refs);
+	if (resv)
+		kref_get(&resv->refs);
 }
 
 static void resv_map_put(struct vm_area_struct *vma)
 {
-	struct resv_map *reservations = vma_resv_map(vma);
+	struct resv_map *resv = vma_resv_map(vma);
 
-	if (!reservations)
+	if (!resv)
 		return;
-	kref_put(&reservations->refs, resv_map_release);
+	kref_put(&resv->refs, resv_map_release);
 }
 
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 {
 	struct hstate *h = hstate_vma(vma);
-	struct resv_map *reservations = vma_resv_map(vma);
+	struct resv_map *resv = vma_resv_map(vma);
 	struct hugepage_subpool *spool = subpool_vma(vma);
 	unsigned long reserve;
 	unsigned long start;
 	unsigned long end;
 
-	if (reservations) {
+	if (resv) {
 		start = vma_hugecache_offset(h, vma, vma->vm_start);
 		end = vma_hugecache_offset(h, vma, vma->vm_end);
 
 		reserve = (end - start) -
-			region_count(&reservations->regions, start, end);
+			region_count(&resv->regions, start, end);
 
 		resv_map_put(vma);
 
@@ -2363,16 +2352,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 	int cow;
 	struct hstate *h = hstate_vma(vma);
 	unsigned long sz = huge_page_size(h);
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+	int ret = 0;
 
 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
+	mmun_start = vma->vm_start;
+	mmun_end = vma->vm_end;
+	if (cow)
+		mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end);
+
 	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
 		src_pte = huge_pte_offset(src, addr);
 		if (!src_pte)
 			continue;
 		dst_pte = huge_pte_alloc(dst, addr, sz);
-		if (!dst_pte)
-			goto nomem;
+		if (!dst_pte) {
+			ret = -ENOMEM;
+			break;
+		}
 
 		/* If the pagetables are shared don't copy or take references */
 		if (dst_pte == src_pte)
@@ -2409,10 +2408,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 		spin_unlock(&src->page_table_lock);
 		spin_unlock(&dst->page_table_lock);
 	}
-	return 0;
 
-nomem:
-	return -ENOMEM;
+	if (cow)
+		mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end);
+
+	return ret;
 }
 
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
@@ -2599,7 +2599,6 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
-	int avoidcopy;
 	int outside_reserve = 0;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -2609,10 +2608,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 retry_avoidcopy:
 	/* If no-one else is actually using this page, avoid the copy
 	 * and just make the page writable */
-	avoidcopy = (page_mapcount(old_page) == 1);
-	if (avoidcopy) {
-		if (PageAnon(old_page))
-			page_move_anon_rmap(old_page, vma, address);
+	if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
+		page_move_anon_rmap(old_page, vma, address);
 		set_huge_ptep_writable(vma, address, ptep);
 		return 0;
 	}
@@ -2626,8 +2623,7 @@ retry_avoidcopy:
 	 * at the time of fork() could consume its reserves on COW instead
 	 * of the full address range.
 	 */
-	if (!(vma->vm_flags & VM_MAYSHARE) &&
-			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
+	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
 			old_page != pagecache_page)
 		outside_reserve = 1;
 
@@ -2699,6 +2695,8 @@ retry_avoidcopy:
 	spin_lock(&mm->page_table_lock);
 	ptep = huge_pte_offset(mm, address & huge_page_mask(h));
 	if (likely(pte_same(huge_ptep_get(ptep), pte))) {
+		ClearPagePrivate(new_page);
+
 		/* Break COW */
 		huge_ptep_clear_flush(vma, address, ptep);
 		set_huge_pte_at(mm, address, ptep,
@@ -2710,10 +2708,11 @@ retry_avoidcopy:
 	}
 	spin_unlock(&mm->page_table_lock);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	/* Caller expects lock to be held */
-	spin_lock(&mm->page_table_lock);
 	page_cache_release(new_page);
 	page_cache_release(old_page);
+
+	/* Caller expects lock to be held */
+	spin_lock(&mm->page_table_lock);
 	return 0;
 }
 
@@ -2809,6 +2808,7 @@ retry:
 					goto retry;
 				goto out;
 			}
+			ClearPagePrivate(page);
 
 			spin_lock(&inode->i_lock);
 			inode->i_blocks += blocks_per_huge_page(h);
@@ -2855,8 +2855,10 @@ retry:
 	if (!huge_pte_none(huge_ptep_get(ptep)))
 		goto backout;
 
-	if (anon_rmap)
+	if (anon_rmap) {
+		ClearPagePrivate(page);
 		hugepage_add_new_anon_rmap(page, vma, address);
+	}
 	else
 		page_dup_rmap(page);
 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
@@ -2990,15 +2992,6 @@ out_mutex:
 	return ret;
 }
 
-/* Can be overriden by architectures */
-__attribute__((weak)) struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-	       pud_t *pud, int write)
-{
-	BUG();
-	return NULL;
-}
-
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			 struct page **pages, struct vm_area_struct **vmas,
 			 unsigned long *position, unsigned long *nr_pages,
@@ -3228,6 +3221,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	hugetlb_acct_memory(h, -(chg - freed));
 }
 
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static unsigned long page_table_shareable(struct vm_area_struct *svma,
+				struct vm_area_struct *vma,
+				unsigned long addr, pgoff_t idx)
+{
+	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
+				svma->vm_start;
+	unsigned long sbase = saddr & PUD_MASK;
+	unsigned long s_end = sbase + PUD_SIZE;
+
+	/* Allow segments to share if only one is marked locked */
+	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+
+	/*
+	 * match the virtual addresses, permission and the alignment of the
+	 * page table page.
+	 */
+	if (pmd_index(addr) != pmd_index(saddr) ||
+	    vm_flags != svm_flags ||
+	    sbase < svma->vm_start || svma->vm_end < s_end)
+		return 0;
+
+	return saddr;
+}
+
+static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long base = addr & PUD_MASK;
+	unsigned long end = base + PUD_SIZE;
+
+	/*
+	 * check on proper vm_flags and page table alignment
+	 */
+	if (vma->vm_flags & VM_MAYSHARE &&
+	    vma->vm_start <= base && end <= vma->vm_end)
+		return 1;
+	return 0;
+}
+
+/*
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
+ * and returns the corresponding pte. While this is not necessary for the
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
+ * code much cleaner. pmd allocation is essential for the shared case because
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
+ * bad pmd for sharing.
+ */
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+{
+	struct vm_area_struct *vma = find_vma(mm, addr);
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+			vma->vm_pgoff;
+	struct vm_area_struct *svma;
+	unsigned long saddr;
+	pte_t *spte = NULL;
+	pte_t *pte;
+
+	if (!vma_shareable(vma, addr))
+		return (pte_t *)pmd_alloc(mm, pud, addr);
+
+	mutex_lock(&mapping->i_mmap_mutex);
+	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
+		if (svma == vma)
+			continue;
+
+		saddr = page_table_shareable(svma, vma, addr, idx);
+		if (saddr) {
+			spte = huge_pte_offset(svma->vm_mm, saddr);
+			if (spte) {
+				get_page(virt_to_page(spte));
+				break;
+			}
+		}
+	}
+
+	if (!spte)
+		goto out;
+
+	spin_lock(&mm->page_table_lock);
+	if (pud_none(*pud))
+		pud_populate(mm, pud,
+				(pmd_t *)((unsigned long)spte & PAGE_MASK));
+	else
+		put_page(virt_to_page(spte));
+	spin_unlock(&mm->page_table_lock);
+out:
+	pte = (pte_t *)pmd_alloc(mm, pud, addr);
+	mutex_unlock(&mapping->i_mmap_mutex);
+	return pte;
+}
+
+/*
+ * unmap huge page backed by shared pte.
+ *
+ * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
+ * indicated by page_count > 1, unmap is achieved by clearing pud and
+ * decrementing the ref count. If count == 1, the pte page is not shared.
+ *
+ * called with vma->vm_mm->page_table_lock held.
+ *
+ * returns: 1 successfully unmapped a shared pte page
+ *	    0 the underlying pte page is not shared, or it is the last user
+ */
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	pgd_t *pgd = pgd_offset(mm, *addr);
+	pud_t *pud = pud_offset(pgd, *addr);
+
+	BUG_ON(page_count(virt_to_page(ptep)) == 0);
+	if (page_count(virt_to_page(ptep)) == 1)
+		return 0;
+
+	pud_clear(pud);
+	put_page(virt_to_page(ptep));
+	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+	return 1;
+}
+#define want_pmd_share()	(1)
+#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+{
+	return NULL;
+}
+#define want_pmd_share()	(0)
+#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+
+#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud) {
+		if (sz == PUD_SIZE) {
+			pte = (pte_t *)pud;
+		} else {
+			BUG_ON(sz != PMD_SIZE);
+			if (want_pmd_share() && pud_none(*pud))
+				pte = huge_pmd_share(mm, addr, pud);
+			else
+				pte = (pte_t *)pmd_alloc(mm, pud, addr);
+		}
+	}
+	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud)) {
+			if (pud_huge(*pud))
+				return (pte_t *)pud;
+			pmd = pmd_offset(pud, addr);
+		}
+	}
+	return (pte_t *) pmd;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pud);
+	if (page)
+		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
+
+/* Can be overriden by architectures */
+__attribute__((weak)) struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+	       pud_t *pud, int write)
+{
+	BUG();
+	return NULL;
+}
+
+#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 /* Should be called in hugetlb_lock */
diff --git a/mm/memory.c b/mm/memory.c
index ebe0f285c0e7..8a6ab7be24b6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
 unsigned long zero_pfn __read_mostly;
 unsigned long highest_memmap_pfn __read_mostly;
 
+EXPORT_SYMBOL(zero_pfn);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 10bbb5427a6d..6d9bace4e589 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <linux/vmstat.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
@@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state);
  * with the global counters. These could cause remote node cache line
  * bouncing and will have to be only done when necessary.
  */
-void refresh_cpu_vm_stats(int cpu)
+bool refresh_cpu_vm_stats(int cpu)
 {
 	struct zone *zone;
 	int i;
 	int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+	bool vm_activity = false;
 
 	for_each_populated_zone(zone) {
 		struct per_cpu_pageset *p;
@@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu)
 		if (p->expire)
 			continue;
 
-		if (p->pcp.count)
+		if (p->pcp.count) {
+			vm_activity = true;
 			drain_zone_pages(zone, &p->pcp);
+		}
 #endif
 	}
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		if (global_diff[i])
+		if (global_diff[i]) {
 			atomic_long_add(global_diff[i], &vm_stat[i]);
+			vm_activity = true;
+		}
+
+	return vm_activity;
+
 }
 
 /*
@@ -1175,22 +1184,72 @@ static const struct file_operations proc_vmstat_file_operations = {
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
+static struct cpumask vmstat_off_cpus;
+struct delayed_work vmstat_monitor_work;
 
-static void vmstat_update(struct work_struct *w)
+static inline bool need_vmstat(int cpu)
 {
-	refresh_cpu_vm_stats(smp_processor_id());
-	schedule_delayed_work(&__get_cpu_var(vmstat_work),
-		round_jiffies_relative(sysctl_stat_interval));
+	struct zone *zone;
+	int i;
+
+	for_each_populated_zone(zone) {
+		struct per_cpu_pageset *p;
+
+		p = per_cpu_ptr(zone->pageset, cpu);
+
+		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+			if (p->vm_stat_diff[i])
+				return true;
+
+		if (zone_to_nid(zone) != numa_node_id() && p->pcp.count)
+			return true;
+	}
+
+	return false;
 }
 
-static void __cpuinit start_cpu_timer(int cpu)
+static void vmstat_update(struct work_struct *w);
+
+static void start_cpu_timer(int cpu)
 {
 	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
 
-	INIT_DEFERRABLE_WORK(work, vmstat_update);
+	cpumask_clear_cpu(cpu, &vmstat_off_cpus);
 	schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
 }
 
+static void __cpuinit setup_cpu_timer(int cpu)
+{
+	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+
+	INIT_DEFERRABLE_WORK(work, vmstat_update);
+	start_cpu_timer(cpu);
+}
+
+static void vmstat_update_monitor(struct work_struct *w)
+{
+	int cpu;
+
+	for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask)
+		if (need_vmstat(cpu))
+			start_cpu_timer(cpu);
+
+	queue_delayed_work(system_unbound_wq, &vmstat_monitor_work,
+		round_jiffies_relative(sysctl_stat_interval));
+}
+
+
+static void vmstat_update(struct work_struct *w)
+{
+	int cpu = smp_processor_id();
+
+	if (likely(refresh_cpu_vm_stats(cpu)))
+		schedule_delayed_work(&__get_cpu_var(vmstat_work),
+				round_jiffies_relative(sysctl_stat_interval));
+	else
+		cpumask_set_cpu(cpu, &vmstat_off_cpus);
+}
+
 /*
  * Use the cpu notifier to insure that the thresholds are recalculated
  * when necessary.
@@ -1205,17 +1264,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		refresh_zone_stat_thresholds();
-		start_cpu_timer(cpu);
+		setup_cpu_timer(cpu);
 		node_set_state(cpu_to_node(cpu), N_CPU);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
-		per_cpu(vmstat_work, cpu).work.func = NULL;
+		if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) {
+			cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
+			per_cpu(vmstat_work, cpu).work.func = NULL;
+		}
 		break;
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		start_cpu_timer(cpu);
+		setup_cpu_timer(cpu);
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
@@ -1238,8 +1299,14 @@ static int __init setup_vmstat(void)
 
 	register_cpu_notifier(&vmstat_notifier);
 
+	INIT_DEFERRABLE_WORK(&vmstat_monitor_work,
+				vmstat_update_monitor);
+	queue_delayed_work(system_unbound_wq,
+				&vmstat_monitor_work,
+				round_jiffies_relative(HZ));
+
 	for_each_online_cpu(cpu)
-		start_cpu_timer(cpu);
+		setup_cpu_timer(cpu);
 #endif
 #ifdef CONFIG_PROC_FS
 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index f97869f1f09b..c4b37f6b5478 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -152,6 +152,7 @@ ld_flags       = $(LDFLAGS) $(ldflags-y)
 dtc_cpp_flags  = -Wp,-MD,$(depfile).pre.tmp -nostdinc                    \
 		 -I$(srctree)/arch/$(SRCARCH)/boot/dts                   \
 		 -I$(srctree)/arch/$(SRCARCH)/boot/dts/include           \
+		 -I$(srctree)/include                                    \
 		 -undef -D__DTS__
 
 # Finds the multi-part object the current object will be linked into
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
index a2af2e88daf3..c9469d34ecc6 100644
--- a/scripts/gcc-goto.sh
+++ b/scripts/gcc-goto.sh
@@ -5,7 +5,7 @@
 cat << "END" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y"
 int main(void)
 {
-#ifdef __arm__
+#if defined(__arm__) || defined(__aarch64__)
 	/*
 	 * Not related to asm goto, but used by jump label
 	 * and broken on some ARM GCC versions (see GCC Bug 48637).
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index e66d4d258e1a..5e74595f2e85 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -174,6 +174,9 @@ int main(void)
 	DEVID_FIELD(x86_cpu_id, model);
 	DEVID_FIELD(x86_cpu_id, vendor);
 
+	DEVID(cpu_feature);
+	DEVID_FIELD(cpu_feature, feature);
+
 	DEVID(mei_cl_device_id);
 	DEVID_FIELD(mei_cl_device_id, name);
 
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 4319a3824727..09632f04576b 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval,
 }
 ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry);
 
+/* LOOKS like cpu:type:*:feature:*FEAT* */
+static int do_cpu_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, cpu_feature, feature);
+
+	sprintf(alias, "cpu:type:*:feature:*%04X*", feature);
+	return 1;
+}
+ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry);
+
 /* Looks like: mei:S */
 static int do_mei_entry(const char *filename, void *symval,
 			char *alias)
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 9c22317778eb..e11aa4a156d2 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -40,6 +40,11 @@
 #define R_METAG_NONE                     3
 #endif
 
+#ifndef EM_AARCH64
+#define EM_AARCH64	183
+#define R_AARCH64_ABS64	257
+#endif
+
 static int fd_map;	/* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -347,6 +352,8 @@ do_file(char const *const fname)
 	case EM_ARM:	 reltype = R_ARM_ABS32;
 			 altmcount = "__gnu_mcount_nc";
 			 break;
+	case EM_AARCH64:
+			 reltype = R_AARCH64_ABS64; gpfx = '_'; break;
 	case EM_IA_64:	 reltype = R_IA64_IMM64;   gpfx = '_'; break;
 	case EM_METAG:	 reltype = R_METAG_ADDR32;
 			 altmcount = "_mcount_wrapper";
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 858966ab019c..151739b4e481 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -279,6 +279,11 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_ARM_(CALL|PC24|THM_CALL)" .
 			"\\s+(__gnu_mcount_nc|mcount)\$";
 
+} elsif ($arch eq "arm64") {
+    $alignment = 3;
+    $section_type = '%progbits';
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_AARCH64_CALL26\\s+_mcount\$";
+    $type = ".quad";
 } elsif ($arch eq "ia64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
     $type = "data8";
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index 1f10e89d15b4..f9ce1160419b 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -31,6 +31,10 @@
 #include <tools/be_byteshift.h>
 #include <tools/le_byteshift.h>
 
+#ifndef EM_AARCH64
+#define EM_AARCH64	183
+#endif
+
 static int fd_map;	/* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -249,6 +253,7 @@ do_file(char const *const fname)
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARM:
+	case EM_AARCH64:
 	case EM_MIPS:
 		break;
 	}  /* end switch */
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 06a8000aa07b..5e9690c85d8f 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -24,6 +24,7 @@
 #include <sound/compress_driver.h>
 #include <sound/soc.h>
 #include <sound/initval.h>
+#include <sound/soc-dpcm.h>
 
 static int soc_compr_open(struct snd_compr_stream *cstream)
 {
@@ -75,6 +76,98 @@ out:
 	return ret;
 }
 
+static int soc_compr_open_fe(struct snd_compr_stream *cstream)
+{
+	struct snd_soc_pcm_runtime *fe = cstream->private_data;
+	struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream;
+	struct snd_soc_platform *platform = fe->platform;
+	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
+	struct snd_soc_dai *codec_dai = fe->codec_dai;
+	struct snd_soc_dpcm *dpcm;
+	struct snd_soc_dapm_widget_list *list;
+	int stream;
+	int ret = 0;
+
+	if (cstream->direction == SND_COMPRESS_PLAYBACK)
+		stream = SNDRV_PCM_STREAM_PLAYBACK;
+	else
+		stream = SNDRV_PCM_STREAM_CAPTURE;
+
+	mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
+
+	if (platform->driver->compr_ops && platform->driver->compr_ops->open) {
+		ret = platform->driver->compr_ops->open(cstream);
+		if (ret < 0) {
+			pr_err("compress asoc: can't open platform %s\n", platform->name);
+			goto out;
+		}
+	}
+
+	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->startup) {
+		ret = fe->dai_link->compr_ops->startup(cstream);
+		if (ret < 0) {
+			pr_err("compress asoc: %s startup failed\n", fe->dai_link->name);
+			goto machine_err;
+		}
+	}
+
+	fe->dpcm[stream].runtime = fe_substream->runtime;
+
+	if (dpcm_path_get(fe, stream, &list) <= 0) {
+		dev_dbg(fe->dev, "ASoC: %s no valid %s route\n",
+			fe->dai_link->name, stream ? "capture" : "playback");
+	}
+
+	/* calculate valid and active FE <-> BE dpcms */
+	dpcm_process_paths(fe, stream, &list, 1);
+
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
+
+	ret = dpcm_be_dai_startup(fe, stream);
+	if (ret < 0) {
+		/* clean up all links */
+		list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be)
+			dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
+
+		dpcm_be_disconnect(fe, stream);
+		fe->dpcm[stream].runtime = NULL;
+		goto fe_err;
+	}
+
+	dpcm_clear_pending_state(fe, stream);
+	dpcm_path_put(&list);
+
+	fe->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN;
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+
+	if (cstream->direction == SND_COMPRESS_PLAYBACK) {
+		cpu_dai->playback_active++;
+		codec_dai->playback_active++;
+	} else {
+		cpu_dai->capture_active++;
+		codec_dai->capture_active++;
+	}
+
+	cpu_dai->active++;
+	codec_dai->active++;
+	fe->codec->active++;
+
+	mutex_unlock(&fe->card->mutex);
+
+	return 0;
+
+fe_err:
+	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
+		fe->dai_link->compr_ops->shutdown(cstream);
+machine_err:
+	if (platform->driver->compr_ops && platform->driver->compr_ops->free)
+		platform->driver->compr_ops->free(cstream);
+out:
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+	mutex_unlock(&fe->card->mutex);
+	return ret;
+}
+
 /*
  * Power down the audio subsystem pmdown_time msecs after close is called.
  * This is to ensure there are no pops or clicks in between any music tracks
@@ -149,8 +242,9 @@ static int soc_compr_free(struct snd_compr_stream *cstream)
 					SND_SOC_DAPM_STREAM_STOP);
 		} else {
 			rtd->pop_wait = 1;
-			schedule_delayed_work(&rtd->delayed_work,
-				msecs_to_jiffies(rtd->pmdown_time));
+			queue_delayed_work(system_power_efficient_wq,
+					   &rtd->delayed_work,
+					   msecs_to_jiffies(rtd->pmdown_time));
 		}
 	} else {
 		/* capture streams can be powered down now */
@@ -163,6 +257,65 @@ static int soc_compr_free(struct snd_compr_stream *cstream)
 	return 0;
 }
 
+static int soc_compr_free_fe(struct snd_compr_stream *cstream)
+{
+	struct snd_soc_pcm_runtime *fe = cstream->private_data;
+	struct snd_soc_platform *platform = fe->platform;
+	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
+	struct snd_soc_dai *codec_dai = fe->codec_dai;
+	struct snd_soc_dpcm *dpcm;
+	int stream, ret;
+
+	mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
+
+	if (cstream->direction == SND_COMPRESS_PLAYBACK) {
+		stream = SNDRV_PCM_STREAM_PLAYBACK;
+		cpu_dai->playback_active--;
+		codec_dai->playback_active--;
+	} else {
+		stream = SNDRV_PCM_STREAM_CAPTURE;
+		cpu_dai->capture_active--;
+		codec_dai->capture_active--;
+	}
+
+	cpu_dai->active--;
+	codec_dai->active--;
+	fe->codec->active--;
+
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
+
+	ret = dpcm_be_dai_hw_free(fe, stream);
+	if (ret < 0)
+		dev_err(fe->dev, "compressed hw_free failed %d\n", ret);
+
+	ret = dpcm_be_dai_shutdown(fe, stream);
+
+	/* mark FE's links ready to prune */
+	list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be)
+		dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
+
+	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+		dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
+	else
+		dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
+
+	fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+
+	dpcm_be_disconnect(fe, stream);
+
+	fe->dpcm[stream].runtime = NULL;
+
+	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
+		fe->dai_link->compr_ops->shutdown(cstream);
+
+	if (platform->driver->compr_ops && platform->driver->compr_ops->free)
+		platform->driver->compr_ops->free(cstream);
+
+	mutex_unlock(&fe->card->mutex);
+	return 0;
+}
+
 static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd)
 {
 
@@ -193,6 +346,59 @@ out:
 	return ret;
 }
 
+static int soc_compr_trigger_fe(struct snd_compr_stream *cstream, int cmd)
+{
+	struct snd_soc_pcm_runtime *fe = cstream->private_data;
+	struct snd_soc_platform *platform = fe->platform;
+	int ret = 0, stream;
+
+	if (cmd == SND_COMPR_TRIGGER_PARTIAL_DRAIN ||
+		cmd == SND_COMPR_TRIGGER_DRAIN) {
+
+		if (platform->driver->compr_ops &&
+			platform->driver->compr_ops->trigger)
+		return platform->driver->compr_ops->trigger(cstream, cmd);
+	}
+
+	if (cstream->direction == SND_COMPRESS_PLAYBACK)
+		stream = SNDRV_PCM_STREAM_PLAYBACK;
+	else
+		stream = SNDRV_PCM_STREAM_CAPTURE;
+
+
+	mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
+
+	if (platform->driver->compr_ops && platform->driver->compr_ops->trigger) {
+		ret = platform->driver->compr_ops->trigger(cstream, cmd);
+		if (ret < 0)
+			goto out;
+	}
+
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
+
+	ret = dpcm_be_dai_trigger(fe, stream, cmd);
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_START;
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP;
+		break;
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED;
+		break;
+	}
+
+out:
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+	mutex_unlock(&fe->card->mutex);
+	return ret;
+}
+
 static int soc_compr_set_params(struct snd_compr_stream *cstream,
 					struct snd_compr_params *params)
 {
@@ -240,6 +446,64 @@ err:
 	return ret;
 }
 
+static int soc_compr_set_params_fe(struct snd_compr_stream *cstream,
+					struct snd_compr_params *params)
+{
+	struct snd_soc_pcm_runtime *fe = cstream->private_data;
+	struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream;
+	struct snd_soc_platform *platform = fe->platform;
+	int ret = 0, stream;
+
+	if (cstream->direction == SND_COMPRESS_PLAYBACK)
+		stream = SNDRV_PCM_STREAM_PLAYBACK;
+	else
+		stream = SNDRV_PCM_STREAM_CAPTURE;
+
+	mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
+
+	if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) {
+		ret = platform->driver->compr_ops->set_params(cstream, params);
+		if (ret < 0)
+			goto out;
+	}
+
+	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->set_params) {
+		ret = fe->dai_link->compr_ops->set_params(cstream);
+		if (ret < 0)
+			goto out;
+	}
+
+	/*
+	 * Create an empty hw_params for the BE as the machine driver must
+	 * fix this up to match DSP decoder and ASRC configuration.
+	 * I.e. machine driver fixup for compressed BE is mandatory.
+	 */
+	memset(&fe->dpcm[fe_substream->stream].hw_params, 0,
+		sizeof(struct snd_pcm_hw_params));
+
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
+
+	ret = dpcm_be_dai_hw_params(fe, stream);
+	if (ret < 0)
+		goto out;
+
+	ret = dpcm_be_dai_prepare(fe, stream);
+	if (ret < 0)
+		goto out;
+
+	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+		dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START);
+	else
+		dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START);
+
+	fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE;
+
+out:
+	fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+	mutex_unlock(&fe->card->mutex);
+	return ret;
+}
+
 static int soc_compr_get_params(struct snd_compr_stream *cstream,
 					struct snd_codec *params)
 {
@@ -334,7 +598,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream,
 	return ret;
 }
 
-static int sst_compr_set_metadata(struct snd_compr_stream *cstream,
+static int soc_compr_set_metadata(struct snd_compr_stream *cstream,
 				struct snd_compr_metadata *metadata)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
@@ -347,7 +611,7 @@ static int sst_compr_set_metadata(struct snd_compr_stream *cstream,
 	return ret;
 }
 
-static int sst_compr_get_metadata(struct snd_compr_stream *cstream,
+static int soc_compr_get_metadata(struct snd_compr_stream *cstream,
 				struct snd_compr_metadata *metadata)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
@@ -359,13 +623,14 @@ static int sst_compr_get_metadata(struct snd_compr_stream *cstream,
 
 	return ret;
 }
+
 /* ASoC Compress operations */
 static struct snd_compr_ops soc_compr_ops = {
 	.open		= soc_compr_open,
 	.free		= soc_compr_free,
 	.set_params	= soc_compr_set_params,
-	.set_metadata   = sst_compr_set_metadata,
-	.get_metadata	= sst_compr_get_metadata,
+	.set_metadata   = soc_compr_set_metadata,
+	.get_metadata	= soc_compr_get_metadata,
 	.get_params	= soc_compr_get_params,
 	.trigger	= soc_compr_trigger,
 	.pointer	= soc_compr_pointer,
@@ -374,6 +639,21 @@ static struct snd_compr_ops soc_compr_ops = {
 	.get_codec_caps = soc_compr_get_codec_caps
 };
 
+/* ASoC Dynamic Compress operations */
+static struct snd_compr_ops soc_compr_dyn_ops = {
+	.open		= soc_compr_open_fe,
+	.free		= soc_compr_free_fe,
+	.set_params	= soc_compr_set_params_fe,
+	.get_params	= soc_compr_get_params,
+	.set_metadata   = soc_compr_set_metadata,
+	.get_metadata	= soc_compr_get_metadata,
+	.trigger	= soc_compr_trigger_fe,
+	.pointer	= soc_compr_pointer,
+	.ack		= soc_compr_ack,
+	.get_caps	= soc_compr_get_caps,
+	.get_codec_caps = soc_compr_get_codec_caps
+};
+
 /* create a new compress */
 int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
 {
@@ -382,6 +662,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	struct snd_compr *compr;
+	struct snd_pcm *be_pcm;
 	char new_name[64];
 	int ret = 0, direction = 0;
 
@@ -409,7 +690,26 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
 		ret = -ENOMEM;
 		goto compr_err;
 	}
-	memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops));
+
+	if (rtd->dai_link->dynamic) {
+		snprintf(new_name, sizeof(new_name), "(%s)",
+			rtd->dai_link->stream_name);
+
+		ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num,
+				1, 0, &be_pcm);
+		if (ret < 0) {
+			dev_err(rtd->card->dev, "ASoC: can't create compressed for %s\n",
+				rtd->dai_link->name);
+			goto compr_err;
+		}
+
+		rtd->pcm = be_pcm;
+		rtd->fe_compr = 1;
+		be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd;
+		be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd;
+		memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops));
+	} else
+		memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops));
 
 	/* Add copy callback for not memory mapped DSPs */
 	if (platform->driver->compr_ops && platform->driver->compr_ops->copy)
diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c
index 0bb5cccd7766..7aa26b5178aa 100644
--- a/sound/soc/soc-jack.c
+++ b/sound/soc/soc-jack.c
@@ -263,7 +263,7 @@ static irqreturn_t gpio_handler(int irq, void *data)
 	if (device_may_wakeup(dev))
 		pm_wakeup_event(dev, gpio->debounce_time + 50);
 
-	schedule_delayed_work(&gpio->work,
+	queue_delayed_work(system_power_efficient_wq, &gpio->work,
 			      msecs_to_jiffies(gpio->debounce_time));
 
 	return IRQ_HANDLED;
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 02d26915b61d..7b4bf0cc1f19 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -34,7 +34,7 @@
 #define DPCM_MAX_BE_USERS	8
 
 /* DPCM stream event, send event to FE and all active BEs. */
-static int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
+int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
 	int event)
 {
 	struct snd_soc_dpcm *dpcm;
@@ -408,8 +408,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
 		} else {
 			/* start delayed pop wq here for playback streams */
 			rtd->pop_wait = 1;
-			schedule_delayed_work(&rtd->delayed_work,
-				msecs_to_jiffies(rtd->pmdown_time));
+			queue_delayed_work(system_power_efficient_wq,
+					   &rtd->delayed_work,
+					   msecs_to_jiffies(rtd->pmdown_time));
 		}
 	} else {
 		/* capture streams can be powered down now */
@@ -757,7 +758,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
 }
 
 /* disconnect a BE and FE */
-static void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
+void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm, *d;
 
@@ -853,7 +854,7 @@ static int widget_in_list(struct snd_soc_dapm_widget_list *list,
 	return 0;
 }
 
-static int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
+int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
 	int stream, struct snd_soc_dapm_widget_list **list_)
 {
 	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
@@ -875,11 +876,6 @@ static int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
 	return paths;
 }
 
-static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list)
-{
-	kfree(*list);
-}
-
 static int dpcm_prune_paths(struct snd_soc_pcm_runtime *fe, int stream,
 	struct snd_soc_dapm_widget_list **list_)
 {
@@ -949,7 +945,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream,
 			continue;
 
 		/* don't connect if FE is not running */
-		if (!fe->dpcm[stream].runtime)
+		if (!fe->dpcm[stream].runtime && !fe->fe_compr)
 			continue;
 
 		/* newly connected FE and BE */
@@ -974,7 +970,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream,
  * Find the corresponding BE DAIs that source or sink audio to this
  * FE substream.
  */
-static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
+int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
 	int stream, struct snd_soc_dapm_widget_list **list, int new)
 {
 	if (new)
@@ -983,7 +979,7 @@ static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
 		return dpcm_prune_paths(fe, stream, list);
 }
 
-static void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
+void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 
@@ -1021,7 +1017,7 @@ static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe,
 	}
 }
 
-static int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
+int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 	int err, count = 0;
@@ -1163,7 +1159,7 @@ be_err:
 	return ret;
 }
 
-static int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
+int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 
@@ -1224,7 +1220,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
+int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 
@@ -1289,7 +1285,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream)
+int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 	int ret;
@@ -1419,7 +1415,7 @@ static int dpcm_do_trigger(struct snd_soc_dpcm *dpcm,
 	return ret;
 }
 
-static int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
+int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
 			       int cmd)
 {
 	struct snd_soc_dpcm *dpcm;
@@ -1587,7 +1583,7 @@ out:
 	return ret;
 }
 
-static int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
+int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
 	int ret = 0;
diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk
new file mode 100644
index 000000000000..970ac6946150
--- /dev/null
+++ b/tools/gator/daemon/Android.mk
@@ -0,0 +1,76 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h)
+
+LOCAL_SRC_FILES := \
+	AnnotateListener.cpp \
+	Buffer.cpp \
+	CCNDriver.cpp \
+	CPUFreqDriver.cpp \
+	CapturedXML.cpp \
+	Child.cpp \
+	Command.cpp \
+	ConfigurationXML.cpp \
+	DiskIODriver.cpp \
+	Driver.cpp \
+	DriverSource.cpp \
+	DynBuf.cpp \
+	EventsXML.cpp \
+	ExternalSource.cpp \
+	FSDriver.cpp \
+	Fifo.cpp \
+	FtraceDriver.cpp \
+	FtraceSource.cpp \
+	HwmonDriver.cpp \
+	KMod.cpp \
+	LocalCapture.cpp \
+	Logging.cpp \
+	main.cpp \
+	MaliVideoDriver.cpp \
+	MemInfoDriver.cpp\
+	Monitor.cpp \
+	NetDriver.cpp \
+	OlySocket.cpp \
+	OlyUtility.cpp \
+	PerfBuffer.cpp \
+	PerfDriver.cpp \
+	PerfGroup.cpp \
+	PerfSource.cpp \
+	Proc.cpp \
+	Sender.cpp \
+	SessionData.cpp \
+	SessionXML.cpp \
+	Setup.cpp \
+	Source.cpp \
+	StreamlineSetup.cpp \
+	UEvent.cpp \
+	UserSpaceSource.cpp \
+	libsensors/access.c \
+	libsensors/conf-lex.c \
+	libsensors/conf-parse.c \
+	libsensors/data.c \
+	libsensors/error.c \
+	libsensors/general.c \
+	libsensors/init.c \
+	libsensors/sysfs.c \
+	mxml/mxml-attr.c \
+	mxml/mxml-entity.c \
+	mxml/mxml-file.c \
+	mxml/mxml-get.c \
+	mxml/mxml-index.c \
+	mxml/mxml-node.c \
+	mxml/mxml-private.c \
+	mxml/mxml-search.c \
+	mxml/mxml-set.c \
+	mxml/mxml-string.c
+
+LOCAL_CFLAGS += -Wall -O3 -fno-exceptions -pthread -DETCDIR=\"/etc\" -Ilibsensors -fPIE
+LOCAL_LDFLAGS += -fPIE -pie
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)
+
+LOCAL_MODULE := gatord
+LOCAL_MODULE_TAGS := optional
+
+include $(BUILD_EXECUTABLE)
diff --git a/tools/gator/daemon/AnnotateListener.cpp b/tools/gator/daemon/AnnotateListener.cpp
new file mode 100644
index 000000000000..50110b4dc84c
--- /dev/null
+++ b/tools/gator/daemon/AnnotateListener.cpp
@@ -0,0 +1,69 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "AnnotateListener.h"
+
+#include <unistd.h>
+
+#include "OlySocket.h"
+
+struct AnnotateClient {
+	AnnotateClient *next;
+	int fd;
+};
+
+AnnotateListener::AnnotateListener() : mClients(NULL), mSock(NULL) {
+}
+
+AnnotateListener::~AnnotateListener() {
+	close();
+	delete mSock;
+}
+
+void AnnotateListener::setup() {
+	mSock = new OlyServerSocket(8082);
+}
+
+int AnnotateListener::getFd() {
+	return mSock->getFd();
+}
+
+void AnnotateListener::handle() {
+	AnnotateClient *const client = new AnnotateClient();
+	client->fd = mSock->acceptConnection();
+	client->next = mClients;
+	mClients = client;
+}
+
+void AnnotateListener::close() {
+	mSock->closeServerSocket();
+	while (mClients != NULL) {
+		::close(mClients->fd);
+		AnnotateClient *next = mClients->next;
+		delete mClients;
+		mClients = next;
+	}
+}
+
+void AnnotateListener::signal() {
+	const char ch = 0;
+	AnnotateClient **ptr = &mClients;
+	AnnotateClient *client = mClients;
+	while (client != NULL) {
+		if (write(client->fd, &ch, sizeof(ch)) != 1) {
+			::close(client->fd);
+			AnnotateClient *next = client->next;
+			delete client;
+			*ptr = next;
+			client = next;
+			continue;
+		}
+		ptr = &client->next;
+		client = client->next;
+	}
+}
diff --git a/tools/gator/daemon/AnnotateListener.h b/tools/gator/daemon/AnnotateListener.h
new file mode 100644
index 000000000000..cdefef12db22
--- /dev/null
+++ b/tools/gator/daemon/AnnotateListener.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+class AnnotateClient;
+class OlyServerSocket;
+
+class AnnotateListener {
+public:
+	AnnotateListener();
+	~AnnotateListener();
+
+	void setup();
+	int getFd();
+
+	void handle();
+	void close();
+	void signal();
+
+private:
+	AnnotateClient *mClients;
+	OlyServerSocket *mSock;
+
+	// Intentionally unimplemented
+	AnnotateListener(const AnnotateListener &);
+	AnnotateListener &operator=(const AnnotateListener &);
+};
diff --git a/tools/gator/daemon/Application.mk b/tools/gator/daemon/Application.mk
new file mode 100644
index 000000000000..3ada471cac19
--- /dev/null
+++ b/tools/gator/daemon/Application.mk
@@ -0,0 +1,3 @@
+APP_PLATFORM := android-8
+# Replace armeabi-v7a with arm64-v8a to build an arm64 gatord or with armeabi to build an ARM11 gatord
+APP_ABI := armeabi-v7a
diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp
new file mode 100644
index 000000000000..8fa628015069
--- /dev/null
+++ b/tools/gator/daemon/Buffer.cpp
@@ -0,0 +1,428 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Buffer.h"
+
+#include "Logging.h"
+#include "Sender.h"
+#include "SessionData.h"
+
+#define mask (mSize - 1)
+
+enum {
+	CODE_PEA         = 1,
+	CODE_KEYS        = 2,
+	CODE_FORMAT      = 3,
+	CODE_MAPS        = 4,
+	CODE_COMM        = 5,
+	CODE_KEYS_OLD    = 6,
+	CODE_ONLINE_CPU  = 7,
+	CODE_OFFLINE_CPU = 8,
+	CODE_KALLSYMS    = 9,
+};
+
+// Summary Frame Messages
+enum {
+	MESSAGE_SUMMARY = 1,
+	MESSAGE_CORE_NAME = 3,
+};
+
+// From gator_marshaling.c
+#define NEWLINE_CANARY \
+	/* Unix */ \
+	"1\n" \
+	/* Windows */ \
+	"2\r\n" \
+	/* Mac OS */ \
+	"3\r" \
+	/* RISC OS */ \
+	"4\n\r" \
+	/* Add another character so the length isn't 0x0a bytes */ \
+	"5"
+
+Buffer::Buffer(const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : mBuf(new char[size]), mReaderSem(readerSem), mCommitTime(gSessionData->mLiveRate), mSize(size), mReadPos(0), mWritePos(0), mCommitPos(0), mAvailable(true), mIsDone(false), mCore(core), mBufType(buftype) {
+	if ((mSize & mask) != 0) {
+		logg->logError(__FILE__, __LINE__, "Buffer size is not a power of 2");
+		handleException();
+	}
+	sem_init(&mWriterSem, 0, 0);
+	frame();
+}
+
+Buffer::~Buffer() {
+	delete [] mBuf;
+	sem_destroy(&mWriterSem);
+}
+
+void Buffer::write(Sender *const sender) {
+	if (!commitReady()) {
+		return;
+	}
+
+	// commit and read are updated by the writer, only read them once
+	int commitPos = mCommitPos;
+	int readPos = mReadPos;
+
+	// determine the size of two halves
+	int length1 = commitPos - readPos;
+	char *buffer1 = mBuf + readPos;
+	int length2 = 0;
+	char *buffer2 = mBuf;
+	if (length1 < 0) {
+		length1 = mSize - readPos;
+		length2 = commitPos;
+	}
+
+	logg->logMessage("Sending data length1: %i length2: %i", length1, length2);
+
+	// start, middle or end
+	if (length1 > 0) {
+		sender->writeData(buffer1, length1, RESPONSE_APC_DATA);
+	}
+
+	// possible wrap around
+	if (length2 > 0) {
+		sender->writeData(buffer2, length2, RESPONSE_APC_DATA);
+	}
+
+	mReadPos = commitPos;
+
+	// send a notification that space is available
+	sem_post(&mWriterSem);
+}
+
+bool Buffer::commitReady() const {
+	return mCommitPos != mReadPos;
+}
+
+int Buffer::bytesAvailable() const {
+	int filled = mWritePos - mReadPos;
+	if (filled < 0) {
+		filled += mSize;
+	}
+
+	int remaining = mSize - filled;
+
+	if (mAvailable) {
+		// Give some extra room; also allows space to insert the overflow error packet
+		remaining -= 200;
+	} else {
+		// Hysteresis, prevents multiple overflow messages
+		remaining -= 2000;
+	}
+
+	return remaining;
+}
+
+bool Buffer::checkSpace(const int bytes) {
+	const int remaining = bytesAvailable();
+
+	if (remaining < bytes) {
+		mAvailable = false;
+	} else {
+		mAvailable = true;
+	}
+
+	return mAvailable;
+}
+
+int Buffer::contiguousSpaceAvailable() const {
+	int remaining = bytesAvailable();
+	int contiguous = mSize - mWritePos;
+	if (remaining < contiguous) {
+		return remaining;
+	} else {
+		return contiguous;
+	}
+}
+
+void Buffer::commit(const uint64_t time) {
+	// post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload
+	const int typeLength = gSessionData->mLocalCapture ? 0 : 1;
+	int length = mWritePos - mCommitPos;
+	if (length < 0) {
+		length += mSize;
+	}
+	length = length - typeLength - sizeof(int32_t);
+	for (size_t byte = 0; byte < sizeof(int32_t); byte++) {
+		mBuf[(mCommitPos + typeLength + byte) & mask] = (length >> byte * 8) & 0xFF;
+	}
+
+	logg->logMessage("Committing data mReadPos: %i mWritePos: %i mCommitPos: %i", mReadPos, mWritePos, mCommitPos);
+	mCommitPos = mWritePos;
+
+	if (gSessionData->mLiveRate > 0) {
+		while (time > mCommitTime) {
+			mCommitTime += gSessionData->mLiveRate;
+		}
+	}
+
+	if (!mIsDone) {
+		frame();
+	}
+
+	// send a notification that data is ready
+	sem_post(mReaderSem);
+}
+
+void Buffer::check(const uint64_t time) {
+	int filled = mWritePos - mCommitPos;
+	if (filled < 0) {
+		filled += mSize;
+	}
+	if (filled >= ((mSize * 3) / 4) || (gSessionData->mLiveRate > 0 && time >= mCommitTime)) {
+		commit(time);
+	}
+}
+
+void Buffer::packInt(char *const buf, const int size, int &writePos, int32_t x) {
+	int packedBytes = 0;
+	int more = true;
+	while (more) {
+		// low order 7 bits of x
+		char b = x & 0x7f;
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+			more = false;
+		} else {
+			b |= 0x80;
+		}
+
+		buf[(writePos + packedBytes) & /*mask*/(size - 1)] = b;
+		packedBytes++;
+	}
+
+	writePos = (writePos + packedBytes) & /*mask*/(size - 1);
+}
+
+void Buffer::packInt(int32_t x) {
+	packInt(mBuf, mSize, mWritePos, x);
+}
+
+void Buffer::packInt64(char *const buf, const int size, int &writePos, int64_t x) {
+	int packedBytes = 0;
+	int more = true;
+	while (more) {
+		// low order 7 bits of x
+		char b = x & 0x7f;
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+			more = false;
+		} else {
+			b |= 0x80;
+		}
+
+		buf[(writePos + packedBytes) & /*mask*/(size - 1)] = b;
+		packedBytes++;
+	}
+
+	writePos = (writePos + packedBytes) & /*mask*/(size - 1);
+}
+
+void Buffer::packInt64(int64_t x) {
+	packInt64(mBuf, mSize, mWritePos, x);
+}
+
+void Buffer::writeBytes(const void *const data, size_t count) {
+	size_t i;
+	for (i = 0; i < count; ++i) {
+		mBuf[(mWritePos + i) & mask] = static_cast<const char *>(data)[i];
+	}
+
+	mWritePos = (mWritePos + i) & mask;
+}
+
+void Buffer::writeString(const char *const str) {
+	const int len = strlen(str);
+	packInt(len);
+	writeBytes(str, len);
+}
+
+void Buffer::frame() {
+	if (!gSessionData->mLocalCapture) {
+		packInt(RESPONSE_APC_DATA);
+	}
+	// Reserve space for the length
+	mWritePos += sizeof(int32_t);
+	packInt(mBufType);
+	if ((mBufType == FRAME_BLOCK_COUNTER) || (mBufType == FRAME_PERF_ATTRS) || (mBufType == FRAME_PERF)) {
+		packInt(mCore);
+	}
+}
+
+void Buffer::summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname) {
+	packInt(MESSAGE_SUMMARY);
+	writeString(NEWLINE_CANARY);
+	packInt64(timestamp);
+	packInt64(uptime);
+	packInt64(monotonicDelta);
+	writeString("uname");
+	writeString(uname);
+	writeString("");
+	check(currTime);
+}
+
+void Buffer::coreName(const uint64_t currTime, const int core, const int cpuid, const char *const name) {
+	if (checkSpace(3 * MAXSIZE_PACK32 + 0x100)) {
+		packInt(MESSAGE_CORE_NAME);
+		packInt(core);
+		packInt(cpuid);
+		writeString(name);
+	}
+	check(currTime);
+}
+
+bool Buffer::eventHeader(const uint64_t curr_time) {
+	bool retval = false;
+	if (checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		// key of zero indicates a timestamp
+		packInt(0);
+		packInt64(curr_time);
+		retval = true;
+	}
+
+	return retval;
+}
+
+bool Buffer::eventTid(const int tid) {
+	bool retval = false;
+	if (checkSpace(2 * MAXSIZE_PACK32)) {
+		// key of 1 indicates a tid
+		packInt(1);
+		packInt(tid);
+		retval = true;
+	}
+
+	return retval;
+}
+
+void Buffer::event(const int key, const int32_t value) {
+	if (checkSpace(2 * MAXSIZE_PACK32)) {
+		packInt(key);
+		packInt(value);
+	}
+}
+
+void Buffer::event64(const int key, const int64_t value) {
+	if (checkSpace(MAXSIZE_PACK64 + MAXSIZE_PACK32)) {
+		packInt(key);
+		packInt64(value);
+	}
+}
+
+void Buffer::pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key) {
+	while (!checkSpace(2 * MAXSIZE_PACK32 + pea->size)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_PEA);
+	writeBytes(pea, pea->size);
+	packInt(key);
+	check(currTime);
+}
+
+void Buffer::keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys) {
+	while (!checkSpace(2 * MAXSIZE_PACK32 + count * (MAXSIZE_PACK32 + MAXSIZE_PACK64))) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_KEYS);
+	packInt(count);
+	for (int i = 0; i < count; ++i) {
+		packInt64(ids[i]);
+		packInt(keys[i]);
+	}
+	check(currTime);
+}
+
+void Buffer::keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf) {
+	while (!checkSpace((2 + keyCount) * MAXSIZE_PACK32 + bytes)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_KEYS_OLD);
+	packInt(keyCount);
+	for (int i = 0; i < keyCount; ++i) {
+		packInt(keys[i]);
+	}
+	writeBytes(buf, bytes);
+	check(currTime);
+}
+
+void Buffer::format(const uint64_t currTime, const int length, const char *const format) {
+	while (!checkSpace(MAXSIZE_PACK32 + length + 1)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_FORMAT);
+	writeBytes(format, length + 1);
+	check(currTime);
+}
+
+void Buffer::maps(const uint64_t currTime, const int pid, const int tid, const char *const maps) {
+	const int mapsLen = strlen(maps) + 1;
+	while (!checkSpace(3 * MAXSIZE_PACK32 + mapsLen)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_MAPS);
+	packInt(pid);
+	packInt(tid);
+	writeBytes(maps, mapsLen);
+	check(currTime);
+}
+
+void Buffer::comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm) {
+	const int imageLen = strlen(image) + 1;
+	const int commLen = strlen(comm) + 1;
+	while (!checkSpace(3 * MAXSIZE_PACK32 + imageLen + commLen)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_COMM);
+	packInt(pid);
+	packInt(tid);
+	writeBytes(image, imageLen);
+	writeBytes(comm, commLen);
+	check(currTime);
+}
+
+void Buffer::onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) {
+	while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_ONLINE_CPU);
+	packInt64(time);
+	packInt(cpu);
+	check(currTime);
+}
+
+void Buffer::offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) {
+	while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_OFFLINE_CPU);
+	packInt64(time);
+	packInt(cpu);
+	check(currTime);
+}
+
+void Buffer::kallsyms(const uint64_t currTime, const char *const kallsyms) {
+	const int kallsymsLen = strlen(kallsyms) + 1;
+	while (!checkSpace(3 * MAXSIZE_PACK32 + kallsymsLen)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_KALLSYMS);
+	writeBytes(kallsyms, kallsymsLen);
+	check(currTime);
+}
+
+void Buffer::setDone() {
+	mIsDone = true;
+	commit(0);
+}
+
+bool Buffer::isDone() const {
+	return mIsDone && mReadPos == mCommitPos && mCommitPos == mWritePos;
+}
diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h
new file mode 100644
index 000000000000..6cffd8e39a36
--- /dev/null
+++ b/tools/gator/daemon/Buffer.h
@@ -0,0 +1,106 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef BUFFER_H
+#define BUFFER_H
+
+#include <stdint.h>
+#include <semaphore.h>
+
+#include "k/perf_event.h"
+
+class Sender;
+
+enum {
+	FRAME_SUMMARY       =  1,
+	FRAME_BLOCK_COUNTER =  5,
+	FRAME_EXTERNAL      = 10,
+	FRAME_PERF_ATTRS    = 11,
+	FRAME_PERF          = 12,
+};
+
+class Buffer {
+public:
+	static const size_t MAXSIZE_PACK32 = 5;
+	static const size_t MAXSIZE_PACK64 = 10;
+
+	Buffer(int32_t core, int32_t buftype, const int size, sem_t *const readerSem);
+	~Buffer();
+
+	void write(Sender *sender);
+
+	int bytesAvailable() const;
+	int contiguousSpaceAvailable() const;
+	void commit(const uint64_t time);
+	void check(const uint64_t time);
+
+	// Summary messages
+	void summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname);
+	void coreName(const uint64_t currTime, const int core, const int cpuid, const char *const name);
+
+	// Block Counter messages
+	bool eventHeader(uint64_t curr_time);
+	bool eventTid(int tid);
+	void event(int key, int32_t value);
+	void event64(int key, int64_t value);
+
+	// Perf Attrs messages
+	void pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key);
+	void keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys);
+	void keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf);
+	void format(const uint64_t currTime, const int length, const char *const format);
+	void maps(const uint64_t currTime, const int pid, const int tid, const char *const maps);
+	void comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm);
+	void onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu);
+	void offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu);
+	void kallsyms(const uint64_t currTime, const char *const kallsyms);
+
+	void setDone();
+	bool isDone() const;
+
+	// Prefer a new member to using these functions if possible
+	char *getWritePos() { return mBuf + mWritePos; }
+	void advanceWrite(int bytes) { mWritePos = (mWritePos + bytes) & /*mask*/(mSize - 1); }
+	static void packInt(char *const buf, const int size, int &writePos, int32_t x);
+	void packInt(int32_t x);
+	static void packInt64(char *const buf, const int size, int &writePos, int64_t x);
+	void packInt64(int64_t x);
+	void writeBytes(const void *const data, size_t count);
+	void writeString(const char *const str);
+
+	static void writeLEInt(unsigned char *buf, int v) {
+		buf[0] = (v >> 0) & 0xFF;
+		buf[1] = (v >> 8) & 0xFF;
+		buf[2] = (v >> 16) & 0xFF;
+		buf[3] = (v >> 24) & 0xFF;
+	}
+
+private:
+	void frame();
+	bool commitReady() const;
+	bool checkSpace(int bytes);
+
+	char *const mBuf;
+	sem_t *const mReaderSem;
+	uint64_t mCommitTime;
+	sem_t mWriterSem;
+	const int mSize;
+	int mReadPos;
+	int mWritePos;
+	int mCommitPos;
+	bool mAvailable;
+	bool mIsDone;
+	const int32_t mCore;
+	const int32_t mBufType;
+
+	// Intentionally unimplemented
+	Buffer(const Buffer &);
+	Buffer &operator=(const Buffer &);
+};
+
+#endif // BUFFER_H
diff --git a/tools/gator/daemon/CCNDriver.cpp b/tools/gator/daemon/CCNDriver.cpp
new file mode 100644
index 000000000000..dd1a2b133842
--- /dev/null
+++ b/tools/gator/daemon/CCNDriver.cpp
@@ -0,0 +1,295 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "CCNDriver.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "k/perf_event.h"
+
+#include "Config.h"
+#include "DriverSource.h"
+#include "Logging.h"
+
+static const char TAG_CATEGORY[] = "category";
+static const char TAG_COUNTER_SET[] = "counter_set";
+static const char TAG_EVENT[] = "event";
+static const char TAG_OPTION[] = "option";
+static const char TAG_OPTION_SET[] = "option_set";
+
+static const char ATTR_AVERAGE_SELECTION[] = "average_selection";
+static const char ATTR_COUNTER[] = "counter";
+static const char ATTR_COUNTER_SET[] = "counter_set";
+static const char ATTR_COUNT[] = "count";
+static const char ATTR_DESCRIPTION[] = "description";
+static const char ATTR_DISPLAY[] = "display";
+static const char ATTR_EVENT[] = "event";
+static const char ATTR_EVENT_DELTA[] = "event_delta";
+static const char ATTR_NAME[] = "name";
+static const char ATTR_OPTION_SET[] = "option_set";
+static const char ATTR_TITLE[] = "title";
+static const char ATTR_UNITS[] = "units";
+
+static const char XP_REGION[] = "XP_Region";
+static const char HNF_REGION[] = "HN-F_Region";
+static const char RNI_REGION[] = "RN-I_Region";
+static const char SBAS_REGION[] = "SBAS_Region";
+static const char CCN_5XX[] = "CCN-5xx";
+#define ARM_CCN_5XX "ARM_CCN_5XX_"
+
+static const char *const VC_TYPES[] = { "REQ", "RSP", "SNP", "DAT" };
+static const char *const XP_EVENT_NAMES[] = { NULL, "H-bit", "S-bit", "P-Cnt", "TknV" };
+static const char *const XP_EVENT_DESCRIPTIONS[] = { NULL, "Set H-bit, signaled when this XP sets the H-bit.", "Set S-bit, signaled when this XP sets the S-bit.", "Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC.", "No TknV, signaled when this XP transmits a valid packet." };
+static const char *const HNF_EVENT_NAMES[] = { NULL, "Cache Miss", "L3 SF Cache Access", "Cache Fill", "POCQ Retry", "POCQ Reqs Recvd", "SF Hit", "SF Evictions", "Snoops Sent", "Snoops Broadcast", "L3 Eviction", "L3 Fill Invalid Way", "MC Retries", "MC Reqs", "QOS HH Retry" };
+static const char *const HNF_EVENT_DESCRIPTIONS[] = { NULL, "Counts the total cache misses. This is the first time lookup result, and is high priority.", "Counts the number of cache accesses. This is the first time access, and is high priority.", "Counts the total allocations in the HN L3 cache, and all cache line allocations to the L3 cache.", "Counts the number of requests that have been retried.", "Counts the number of requests received by HN.", "Counts the number of snoop filter hits.", "Counts the number of snoop filter evictions. Cache invalidations are initiated.", "Counts the number of snoops sent. Does not differentiate between broadcast or directed snoops.", "Counts the number of snoop broadcasts sent.", "Counts the number of L3 evictions.", "Counts the number of L3 fills to an invalid way.", "Counts the number of transactions retried by the memory controller.", "Counts the number of requests to the memory controller.", "Counts the number of times a highest-priority QoS class was retried at the HN-F." };
+static const char *const RNI_EVENT_NAMES[] = { NULL, "S0 RDataBeats", "S1 RDataBeats", "S2 RDataBeats", "RXDAT Flits received", "TXDAT Flits sent", "Total TXREQ Flits sent", "Retried TXREQ Flits sent", "RRT full", "WRT full", "Replayed TXREQ Flits" };
+static const char *const RNI_EVENT_DESCRIPTIONS[] = { NULL, "S0 RDataBeats.", "S1 RDataBeats.", "S2 RDataBeats.", "RXDAT Flits received.", "TXDAT Flits sent.", "Total TXREQ Flits sent.", "Retried TXREQ Flits sent.", "RRT full.", "WRT full.", "Replayed TXREQ Flits." };
+static const char *const SBAS_EVENT_NAMES[] = { NULL, "S0 RDataBeats", NULL, NULL, "RXDAT Flits received", "TXDAT Flits sent", "Total TXREQ Flits sent", "Retried TXREQ Flits sent", "RRT full", "WRT full", "Replayed TXREQ Flits" };
+static const char *const SBAS_EVENT_DESCRIPTIONS[] = { NULL, "S0 RDataBeats.", NULL, NULL, "RXDAT Flits received.", "TXDAT Flits sent.", "Total TXREQ Flits sent.", "Retried TXREQ Flits sent.", "RRT full.", "WRT full.", "Replayed TXREQ Flits." };
+
+// This class is used only to poll for CCN-5xx configuration and emit events XML for it. All other operations are handled by PerfDriver
+
+static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t pid, const int cpu, const int group_fd, const unsigned long flags) {
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static unsigned int getConfig(unsigned int node, unsigned int type, unsigned int event, unsigned int port, unsigned int vc) {
+  return
+    ((node  & 0xff) <<  0) |
+    ((type  & 0xff) <<  8) |
+    ((event & 0xff) << 16) |
+    ((port  & 0x03) << 24) |
+    ((vc    & 0x07) << 26) |
+    0;
+}
+
+static bool perfPoll(struct perf_event_attr *const pea) {
+	int fd = sys_perf_event_open(pea, -1, 0, -1, 0);
+	if (fd < 0) {
+		return false;
+	}
+	close(fd);
+	return true;
+}
+
+CCNDriver::CCNDriver() : mNodeTypes(NULL), mXpCount(0) {
+}
+
+CCNDriver::~CCNDriver() {
+	delete mNodeTypes;
+}
+
+bool CCNDriver::claimCounter(const Counter &) const {
+	// Handled by PerfDriver
+	return false;
+}
+
+void CCNDriver::resetCounters() {
+	// Handled by PerfDriver
+}
+
+void CCNDriver::setupCounter(Counter &) {
+	// Handled by PerfDriver
+}
+
+void CCNDriver::readEvents(mxml_node_t *const) {
+	struct stat st;
+	if (stat("/sys/bus/event_source/devices/ccn", &st) != 0) {
+		// Not found
+		return;
+	}
+
+	int type;
+	if (DriverSource::readIntDriver("/sys/bus/event_source/devices/ccn/type", &type) != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to read CCN-5xx type");
+		handleException();
+	}
+
+	// Detect number of xps
+	struct perf_event_attr pea;
+	memset(&pea, 0, sizeof(pea));
+	pea.type = type;
+	pea.size = sizeof(pea);
+
+	mXpCount = 1;
+	while (true) {
+		pea.config = getConfig(0, 0x08, 1, 0, 1) | mXpCount;
+		if (!perfPoll(&pea)) {
+			break;
+		}
+		mXpCount *= 2;
+	};
+	{
+		int lower = mXpCount/2 + 1;
+		while (lower < mXpCount) {
+			int mid = (lower + mXpCount)/2;
+			pea.config = getConfig(0, 0x08, 1, 0, 1) | mid;
+			if (perfPoll(&pea)) {
+				lower = mid + 1;
+			} else {
+				mXpCount = mid;
+			}
+		}
+	}
+
+	mNodeTypes = new NodeType[2*mXpCount];
+
+	// Detect node types
+	for (int i = 0; i < 2*mXpCount; ++i) {
+		pea.config = getConfig(0, 0x04, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_HNF;
+			continue;
+		}
+
+		pea.config = getConfig(0, 0x16, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_RNI;
+			continue;
+		}
+
+		pea.config = getConfig(0, 0x10, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_SBAS;
+			continue;
+		}
+
+		mNodeTypes[i] = NT_UNKNOWN;
+	}
+}
+
+int CCNDriver::writeCounters(mxml_node_t *const) const {
+	// Handled by PerfDriver
+	return 0;
+}
+
+void CCNDriver::writeEvents(mxml_node_t *const root) const {
+	mxml_node_t *const counter_set = mxmlNewElement(root, TAG_COUNTER_SET);
+	mxmlElementSetAttr(counter_set, ATTR_NAME, ARM_CCN_5XX "cnt");
+	mxmlElementSetAttr(counter_set, ATTR_COUNT, "8");
+
+	mxml_node_t *const category = mxmlNewElement(root, TAG_CATEGORY);
+	mxmlElementSetAttr(category, ATTR_NAME, CCN_5XX);
+	mxmlElementSetAttr(category, TAG_COUNTER_SET, ARM_CCN_5XX "cnt");
+
+	mxml_node_t *const clock_event = mxmlNewElement(category, TAG_EVENT);
+	mxmlElementSetAttr(clock_event, ATTR_COUNTER, ARM_CCN_5XX "ccnt");
+	mxmlElementSetAttr(clock_event, ATTR_EVENT, "0xff00");
+	mxmlElementSetAttr(clock_event, ATTR_TITLE, "CCN-5xx Clock");
+	mxmlElementSetAttr(clock_event, ATTR_NAME, "Cycles");
+	mxmlElementSetAttr(clock_event, ATTR_DISPLAY, "hertz");
+	mxmlElementSetAttr(clock_event, ATTR_UNITS, "Hz");
+	mxmlElementSetAttr(clock_event, ATTR_AVERAGE_SELECTION, "yes");
+	mxmlElementSetAttr(clock_event, ATTR_DESCRIPTION, "The number of core clock cycles");
+
+	mxml_node_t *const xp_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(xp_option_set, ATTR_NAME, XP_REGION);
+
+	for (int i = 0; i < mXpCount; ++i) {
+		mxml_node_t *const option = mxmlNewElement(xp_option_set, TAG_OPTION);
+		mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+		mxmlElementSetAttrf(option, ATTR_NAME, "XP %i", i);
+		mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "Crosspoint %i", i);
+	}
+
+	for (int vc = 0; vc < ARRAY_LENGTH(VC_TYPES); ++vc) {
+		if (VC_TYPES[vc] == NULL) {
+			continue;
+		}
+		for (int bus = 0; bus < 2; ++bus) {
+			for (int eventId = 0; eventId < ARRAY_LENGTH(XP_EVENT_NAMES); ++eventId) {
+				if (XP_EVENT_NAMES[eventId] == NULL) {
+					continue;
+				}
+				mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+				mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x08, eventId, bus, vc));
+				mxmlElementSetAttr(event, ATTR_OPTION_SET, XP_REGION);
+				mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+				mxmlElementSetAttrf(event, ATTR_NAME, "Bus %i: %s: %s", bus, VC_TYPES[vc], XP_EVENT_NAMES[eventId]);
+				mxmlElementSetAttrf(event, ATTR_DESCRIPTION, "Bus %i: %s: %s", bus, VC_TYPES[vc], XP_EVENT_DESCRIPTIONS[eventId]);
+			}
+		}
+	}
+
+	mxml_node_t *const hnf_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(hnf_option_set, ATTR_NAME, HNF_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(HNF_EVENT_NAMES); ++eventId) {
+		if (HNF_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x04, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, HNF_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, HNF_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, HNF_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	mxml_node_t *const rni_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(rni_option_set, ATTR_NAME, RNI_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(RNI_EVENT_NAMES); ++eventId) {
+		if (RNI_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x16, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, RNI_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, RNI_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, RNI_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	mxml_node_t *const sbas_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(sbas_option_set, ATTR_NAME, SBAS_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(SBAS_EVENT_NAMES); ++eventId) {
+		if (SBAS_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x10, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, SBAS_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, SBAS_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, SBAS_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	for (int i = 0; i < 2*mXpCount; ++i) {
+		switch (mNodeTypes[i]) {
+		case NT_HNF: {
+			mxml_node_t *const option = mxmlNewElement(hnf_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "HN-F %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "Fully-coherent Home Node %i", i);
+			break;
+		}
+		case NT_RNI: {
+			mxml_node_t *const option = mxmlNewElement(rni_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "RN-I %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "I/O-coherent Requesting Node %i", i);
+			break;
+		}
+		case NT_SBAS: {
+			mxml_node_t *const option = mxmlNewElement(sbas_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "SBAS %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "ACE master to CHI protocol bridge %i", i);
+			break;
+		}
+		default:
+			continue;
+		}
+	}
+}
diff --git a/tools/gator/daemon/CCNDriver.h b/tools/gator/daemon/CCNDriver.h
new file mode 100644
index 000000000000..fb4c717e969a
--- /dev/null
+++ b/tools/gator/daemon/CCNDriver.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CCNDRIVER_H
+#define CCNDRIVER_H
+
+#include "Driver.h"
+
+class CCNDriver : public Driver {
+public:
+	CCNDriver();
+	~CCNDriver();
+
+	bool claimCounter(const Counter &counter) const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	void readEvents(mxml_node_t *const);
+	int writeCounters(mxml_node_t *const root) const;
+	void writeEvents(mxml_node_t *const) const;
+
+private:
+	enum NodeType {
+		NT_UNKNOWN,
+		NT_HNF,
+		NT_RNI,
+		NT_SBAS,
+	};
+
+	NodeType *mNodeTypes;
+	int mXpCount;
+
+	// Intentionally unimplemented
+	CCNDriver(const CCNDriver &);
+	CCNDriver &operator=(const CCNDriver &);
+};
+
+#endif // CCNDRIVER_H
diff --git a/tools/gator/daemon/CPUFreqDriver.cpp b/tools/gator/daemon/CPUFreqDriver.cpp
new file mode 100644
index 000000000000..41f9d6f2b3f4
--- /dev/null
+++ b/tools/gator/daemon/CPUFreqDriver.cpp
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "CPUFreqDriver.h"
+
+#include "Buffer.h"
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+CPUFreqDriver::CPUFreqDriver() : mPrev() {
+}
+
+CPUFreqDriver::~CPUFreqDriver() {
+}
+
+void CPUFreqDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new DriverCounter(getCounters(), strdup("Linux_power_cpu_freq")));
+}
+
+void CPUFreqDriver::read(Buffer *const buffer) {
+	char buf[64];
+	const DriverCounter *const counter = getCounters();
+	if ((counter == NULL) || !counter->isEnabled()) {
+		return;
+	}
+
+	const int key = getCounters()->getKey();
+	bool resetCores = false;
+	for (int i = 0; i < gSessionData->mCores; ++i) {
+		snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%i/cpufreq/cpuinfo_cur_freq", i);
+		int64_t freq;
+		if (DriverSource::readInt64Driver(buf, &freq) != 0) {
+			freq = 0;
+		}
+		if (mPrev[i] != freq) {
+			mPrev[i] = freq;
+			// Change cores
+			buffer->event64(2, i);
+			resetCores = true;
+			buffer->event64(key, 1000*freq);
+		}
+	}
+	if (resetCores) {
+		// Revert cores, UserSpaceSource is all on core 0
+		buffer->event64(2, 0);
+	}
+}
diff --git a/tools/gator/daemon/CPUFreqDriver.h b/tools/gator/daemon/CPUFreqDriver.h
new file mode 100644
index 000000000000..ad8c9aaa9e7d
--- /dev/null
+++ b/tools/gator/daemon/CPUFreqDriver.h
@@ -0,0 +1,34 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CPUFREQDRIVER_H
+#define CPUFREQDRIVER_H
+
+#include "Config.h"
+#include "Driver.h"
+
+class CPUFreqDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	CPUFreqDriver();
+	~CPUFreqDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void read(Buffer *const buffer);
+
+private:
+	int64_t mPrev[NR_CPUS];
+
+	// Intentionally unimplemented
+	CPUFreqDriver(const CPUFreqDriver &);
+	CPUFreqDriver &operator=(const CPUFreqDriver &);
+};
+
+#endif // CPUFREQDRIVER_H
diff --git a/tools/gator/daemon/CapturedXML.cpp b/tools/gator/daemon/CapturedXML.cpp
new file mode 100644
index 000000000000..0b5802c893bb
--- /dev/null
+++ b/tools/gator/daemon/CapturedXML.cpp
@@ -0,0 +1,145 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "CapturedXML.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+
+#include "SessionData.h"
+#include "Logging.h"
+#include "OlyUtility.h"
+
+CapturedXML::CapturedXML() {
+}
+
+CapturedXML::~CapturedXML() {
+}
+
+mxml_node_t* CapturedXML::getTree(bool includeTime) {
+	mxml_node_t *xml;
+	mxml_node_t *captured;
+	mxml_node_t *target;
+	int x;
+
+	xml = mxmlNewXML("1.0");
+
+	captured = mxmlNewElement(xml, "captured");
+	mxmlElementSetAttr(captured, "version", "1");
+	if (gSessionData->perf.isSetup()) {
+		mxmlElementSetAttr(captured, "type", "Perf");
+		mxmlElementSetAttr(captured, "perf_beta", "yes");
+	}
+	mxmlElementSetAttrf(captured, "protocol", "%d", PROTOCOL_VERSION);
+	if (includeTime) { // Send the following only after the capture is complete
+		if (time(NULL) > 1267000000) { // If the time is reasonable (after Feb 23, 2010)
+			mxmlElementSetAttrf(captured, "created", "%lu", time(NULL)); // Valid until the year 2038
+		}
+	}
+
+	target = mxmlNewElement(captured, "target");
+	mxmlElementSetAttr(target, "name", gSessionData->mCoreName);
+	mxmlElementSetAttrf(target, "sample_rate", "%d", gSessionData->mSampleRate);
+	mxmlElementSetAttrf(target, "cores", "%d", gSessionData->mCores);
+	mxmlElementSetAttrf(target, "cpuid", "0x%x", gSessionData->mMaxCpuId);
+
+	if (!gSessionData->mOneShot && (gSessionData->mSampleRate > 0)) {
+		mxmlElementSetAttr(target, "supports_live", "yes");
+	}
+
+	if (gSessionData->mLocalCapture) {
+		mxmlElementSetAttr(target, "local_capture", "yes");
+	}
+
+	mxml_node_t *counters = NULL;
+	for (x = 0; x < MAX_PERFORMANCE_COUNTERS; x++) {
+		const Counter & counter = gSessionData->mCounters[x];
+		if (counter.isEnabled()) {
+			if (counters == NULL) {
+				counters = mxmlNewElement(captured, "counters");
+			}
+			mxml_node_t *const node = mxmlNewElement(counters, "counter");
+			mxmlElementSetAttrf(node, "key", "0x%x", counter.getKey());
+			mxmlElementSetAttr(node, "type", counter.getType());
+			if (counter.getEvent() != -1) {
+				mxmlElementSetAttrf(node, "event", "0x%x", counter.getEvent());
+			}
+			if (counter.getCount() > 0) {
+				mxmlElementSetAttrf(node, "count", "%d", counter.getCount());
+			}
+			if (counter.getCores() > 0) {
+				mxmlElementSetAttrf(node, "cores", "%d", counter.getCores());
+			}
+		}
+	}
+
+	return xml;
+}
+
+char* CapturedXML::getXML(bool includeTime) {
+	char* xml_string;
+	mxml_node_t *xml = getTree(includeTime);
+	xml_string = mxmlSaveAllocString(xml, mxmlWhitespaceCB);
+	mxmlDelete(xml);
+	return xml_string;
+}
+
+void CapturedXML::write(char* path) {
+	char file[PATH_MAX];
+
+	// Set full path
+	snprintf(file, PATH_MAX, "%s/captured.xml", path);
+
+	char* xml = getXML(true);
+	if (util->writeToDisk(file, xml) < 0) {
+		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file);
+		handleException();
+	}
+
+	free(xml);
+}
+
+// whitespace callback utility function used with mini-xml
+const char * mxmlWhitespaceCB(mxml_node_t *node, int loc) {
+	const char *name;
+
+	name = mxmlGetElement(node);
+
+	if (loc == MXML_WS_BEFORE_OPEN) {
+		// Single indentation
+		if (!strcmp(name, "target") || !strcmp(name, "counters"))
+			return "\n  ";
+
+		// Double indentation
+		if (!strcmp(name, "counter"))
+			return "\n    ";
+
+		// Avoid a carriage return on the first line of the xml file
+		if (!strncmp(name, "?xml", 4))
+			return NULL;
+
+		// Default - no indentation
+		return "\n";
+	}
+
+	if (loc == MXML_WS_BEFORE_CLOSE) {
+		// No indentation
+		if (!strcmp(name, "captured"))
+			return "\n";
+
+		// Single indentation
+		if (!strcmp(name, "counters"))
+			return "\n  ";
+
+		// Default - no carriage return
+		return NULL;
+	}
+
+	return NULL;
+}
diff --git a/tools/gator/daemon/CapturedXML.h b/tools/gator/daemon/CapturedXML.h
new file mode 100644
index 000000000000..b704f6e53bb5
--- /dev/null
+++ b/tools/gator/daemon/CapturedXML.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CAPTURED_XML_H__
+#define __CAPTURED_XML_H__
+
+#include "mxml/mxml.h"
+
+class CapturedXML {
+public:
+	CapturedXML();
+	~CapturedXML();
+	char* getXML(bool includeTime); // the string should be freed by the caller
+	void write(char* path);
+private:
+	mxml_node_t* getTree(bool includeTime);
+};
+
+const char * mxmlWhitespaceCB(mxml_node_t *node, int where);
+
+#endif //__CAPTURED_XML_H__
diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp
new file mode 100644
index 000000000000..6b5bbb3bf6af
--- /dev/null
+++ b/tools/gator/daemon/Child.cpp
@@ -0,0 +1,392 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Child.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+
+#include "CapturedXML.h"
+#include "Command.h"
+#include "ConfigurationXML.h"
+#include "Driver.h"
+#include "DriverSource.h"
+#include "ExternalSource.h"
+#include "FtraceSource.h"
+#include "LocalCapture.h"
+#include "Logging.h"
+#include "OlySocket.h"
+#include "OlyUtility.h"
+#include "PerfSource.h"
+#include "Sender.h"
+#include "SessionData.h"
+#include "StreamlineSetup.h"
+#include "UserSpaceSource.h"
+
+static sem_t haltPipeline, senderThreadStarted, startProfile, senderSem; // Shared by Child and spawned threads
+static Source *primarySource = NULL;
+static Source *externalSource = NULL;
+static Source *userSpaceSource = NULL;
+static Source *ftraceSource = NULL;
+static Sender* sender = NULL;        // Shared by Child.cpp and spawned threads
+Child* child = NULL;                 // shared by Child.cpp and main.cpp
+
+extern void cleanUp();
+void handleException() {
+	if (child && child->numExceptions++ > 0) {
+		// it is possible one of the below functions itself can cause an exception, thus allow only one exception
+		logg->logMessage("Received multiple exceptions, terminating the child");
+		exit(1);
+	}
+	fprintf(stderr, "%s", logg->getLastError());
+
+	if (child && child->socket) {
+		if (sender) {
+			// send the error, regardless of the command sent by Streamline
+			sender->writeData(logg->getLastError(), strlen(logg->getLastError()), RESPONSE_ERROR);
+
+			// cannot close the socket before Streamline issues the command, so wait for the command before exiting
+			if (gSessionData->mWaitingOnCommand) {
+				char discard;
+				child->socket->receiveNBytes(&discard, 1);
+			}
+
+			// Ensure all data is flushed
+			child->socket->shutdownConnection();
+
+			// this indirectly calls close socket which will ensure the data has been sent
+			delete sender;
+		}
+	}
+
+	if (gSessionData->mLocalCapture)
+		cleanUp();
+
+	exit(1);
+}
+
+// CTRL C Signal Handler for child process
+static void child_handler(int signum) {
+	static bool beenHere = false;
+	if (beenHere == true) {
+		logg->logMessage("Gator is being forced to shut down.");
+		exit(1);
+	}
+	beenHere = true;
+	logg->logMessage("Gator is shutting down.");
+	if (signum == SIGALRM || !primarySource) {
+		exit(1);
+	} else {
+		child->endSession();
+		alarm(5); // Safety net in case endSession does not complete within 5 seconds
+	}
+}
+
+static void *durationThread(void *) {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-duration", 0, 0, 0);
+	sem_wait(&startProfile);
+	if (gSessionData->mSessionIsActive) {
+		// Time out after duration seconds
+		// Add a second for host-side filtering
+		sleep(gSessionData->mDuration + 1);
+		if (gSessionData->mSessionIsActive) {
+			logg->logMessage("Duration expired.");
+			child->endSession();
+		}
+	}
+	logg->logMessage("Exit duration thread");
+	return 0;
+}
+
+static void *stopThread(void *) {
+	OlySocket* socket = child->socket;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-stopper", 0, 0, 0);
+	while (gSessionData->mSessionIsActive) {
+		// This thread will stall until the APC_STOP or PING command is received over the socket or the socket is disconnected
+		unsigned char header[5];
+		const int result = socket->receiveNBytes((char*)&header, sizeof(header));
+		const char type = header[0];
+		const int length = (header[1] << 0) | (header[2] << 8) | (header[3] << 16) | (header[4] << 24);
+		if (result == -1) {
+			child->endSession();
+		} else if (result > 0) {
+			if ((type != COMMAND_APC_STOP) && (type != COMMAND_PING)) {
+				logg->logMessage("INVESTIGATE: Received unknown command type %d", type);
+			} else {
+				// verify a length of zero
+				if (length == 0) {
+					if (type == COMMAND_APC_STOP) {
+						logg->logMessage("Stop command received.");
+						child->endSession();
+					} else {
+						// Ping is used to make sure gator is alive and requires an ACK as the response
+						logg->logMessage("Ping command received.");
+						sender->writeData(NULL, 0, RESPONSE_ACK);
+					}
+				} else {
+					logg->logMessage("INVESTIGATE: Received stop command but with length = %d", length);
+				}
+			}
+		}
+	}
+
+	logg->logMessage("Exit stop thread");
+	return 0;
+}
+
+static void *senderThread(void *) {
+	char end_sequence[] = {RESPONSE_APC_DATA, 0, 0, 0, 0};
+
+	sem_post(&senderThreadStarted);
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-sender", 0, 0, 0);
+	sem_wait(&haltPipeline);
+
+	while (!primarySource->isDone() ||
+	       !externalSource->isDone() ||
+	       (userSpaceSource != NULL && !userSpaceSource->isDone()) ||
+	       (ftraceSource != NULL && !ftraceSource->isDone())) {
+		sem_wait(&senderSem);
+
+		primarySource->write(sender);
+		externalSource->write(sender);
+		if (userSpaceSource != NULL) {
+			userSpaceSource->write(sender);
+		}
+		if (ftraceSource != NULL) {
+			ftraceSource->write(sender);
+		}
+	}
+
+	// write end-of-capture sequence
+	if (!gSessionData->mLocalCapture) {
+		sender->writeData(end_sequence, sizeof(end_sequence), RESPONSE_APC_DATA);
+	}
+
+	logg->logMessage("Exit sender thread");
+	return 0;
+}
+
+Child::Child() {
+	initialization();
+	gSessionData->mLocalCapture = true;
+}
+
+Child::Child(OlySocket* sock, int conn) {
+	initialization();
+	socket = sock;
+	mNumConnections = conn;
+}
+
+Child::~Child() {
+}
+
+void Child::initialization() {
+	// Set up different handlers for signals
+	gSessionData->mSessionIsActive = true;
+	signal(SIGINT, child_handler);
+	signal(SIGTERM, child_handler);
+	signal(SIGABRT, child_handler);
+	signal(SIGALRM, child_handler);
+	socket = NULL;
+	numExceptions = 0;
+	mNumConnections = 0;
+
+	// Initialize semaphores
+	sem_init(&senderThreadStarted, 0, 0);
+	sem_init(&startProfile, 0, 0);
+	sem_init(&senderSem, 0, 0);
+}
+
+void Child::endSession() {
+	gSessionData->mSessionIsActive = false;
+	primarySource->interrupt();
+	externalSource->interrupt();
+	if (userSpaceSource != NULL) {
+		userSpaceSource->interrupt();
+	}
+	if (ftraceSource != NULL) {
+		ftraceSource->interrupt();
+	}
+	sem_post(&haltPipeline);
+}
+
+void Child::run() {
+	LocalCapture* localCapture = NULL;
+	pthread_t durationThreadID, stopThreadID, senderThreadID;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-child", 0, 0, 0);
+
+	// Disable line wrapping when generating xml files; carriage returns and indentation to be added manually
+	mxmlSetWrapMargin(0);
+
+	// Instantiate the Sender - must be done first, after which error messages can be sent
+	sender = new Sender(socket);
+
+	if (mNumConnections > 1) {
+		logg->logError(__FILE__, __LINE__, "Session already in progress");
+		handleException();
+	}
+
+	// Populate gSessionData with the configuration
+	{ ConfigurationXML configuration; }
+
+	// Set up the driver; must be done after gSessionData->mPerfCounterType[] is populated
+	if (!gSessionData->perf.isSetup()) {
+		primarySource = new DriverSource(&senderSem, &startProfile);
+	} else {
+		primarySource = new PerfSource(&senderSem, &startProfile);
+	}
+
+	// Initialize all drivers
+	for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+		driver->resetCounters();
+	}
+
+	// Set up counters using the associated driver's setup function
+	for (int i = 0; i < MAX_PERFORMANCE_COUNTERS; i++) {
+		Counter & counter = gSessionData->mCounters[i];
+		if (counter.isEnabled()) {
+			counter.getDriver()->setupCounter(counter);
+		}
+	}
+
+	// Start up and parse session xml
+	if (socket) {
+		// Respond to Streamline requests
+		StreamlineSetup ss(socket);
+	} else {
+		char* xmlString;
+		xmlString = util->readFromDisk(gSessionData->mSessionXMLPath);
+		if (xmlString == 0) {
+			logg->logError(__FILE__, __LINE__, "Unable to read session xml file: %s", gSessionData->mSessionXMLPath);
+			handleException();
+		}
+		gSessionData->parseSessionXML(xmlString);
+		localCapture = new LocalCapture();
+		localCapture->createAPCDirectory(gSessionData->mTargetPath);
+		localCapture->copyImages(gSessionData->mImages);
+		localCapture->write(xmlString);
+		sender->createDataFile(gSessionData->mAPCDir);
+		free(xmlString);
+	}
+
+	if (gSessionData->kmod.isMaliCapture() && (gSessionData->mSampleRate == 0)) {
+		logg->logError(__FILE__, __LINE__, "Mali counters are not supported with Sample Rate: None.");
+		handleException();
+	}
+
+	// Must be after session XML is parsed
+	if (!primarySource->prepare()) {
+		if (gSessionData->perf.isSetup()) {
+			logg->logError(__FILE__, __LINE__, "Unable to prepare gator driver for capture");
+		} else {
+			logg->logError(__FILE__, __LINE__, "Unable to communicate with the perf API, please ensure that CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER are enabled. Please refer to README_Streamline.txt for more information.");
+		}
+		handleException();
+	}
+
+	// Sender thread shall be halted until it is signaled for one shot mode
+	sem_init(&haltPipeline, 0, gSessionData->mOneShot ? 0 : 2);
+
+	// Must be initialized before senderThread is started as senderThread checks externalSource
+	externalSource = new ExternalSource(&senderSem);
+	if (!externalSource->prepare()) {
+		logg->logError(__FILE__, __LINE__, "Unable to prepare external source for capture");
+		handleException();
+	}
+	externalSource->start();
+
+	// Create the duration, stop, and sender threads
+	bool thread_creation_success = true;
+	if (gSessionData->mDuration > 0 && pthread_create(&durationThreadID, NULL, durationThread, NULL)) {
+		thread_creation_success = false;
+	} else if (socket && pthread_create(&stopThreadID, NULL, stopThread, NULL)) {
+		thread_creation_success = false;
+	} else if (pthread_create(&senderThreadID, NULL, senderThread, NULL)) {
+		thread_creation_success = false;
+	}
+
+	bool startUSSource = false;
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+		if (gSessionData->usDrivers[i]->countersEnabled()) {
+			startUSSource = true;
+		}
+	}
+	if (startUSSource) {
+		userSpaceSource = new UserSpaceSource(&senderSem);
+		if (!userSpaceSource->prepare()) {
+			logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture");
+			handleException();
+		}
+		userSpaceSource->start();
+	}
+
+	if (gSessionData->ftraceDriver.countersEnabled()) {
+		ftraceSource = new FtraceSource(&senderSem);
+		if (!ftraceSource->prepare()) {
+			logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture");
+			handleException();
+		}
+		ftraceSource->start();
+	}
+
+	if (gSessionData->mAllowCommands && (gSessionData->mCaptureCommand != NULL)) {
+		pthread_t thread;
+		if (pthread_create(&thread, NULL, commandThread, NULL)) {
+			thread_creation_success = false;
+		}
+	}
+
+	if (!thread_creation_success) {
+		logg->logError(__FILE__, __LINE__, "Failed to create gator threads");
+		handleException();
+	}
+
+	// Wait until thread has started
+	sem_wait(&senderThreadStarted);
+
+	// Start profiling
+	primarySource->run();
+
+	if (ftraceSource != NULL) {
+		ftraceSource->join();
+	}
+	if (userSpaceSource != NULL) {
+		userSpaceSource->join();
+	}
+	externalSource->join();
+
+	// Wait for the other threads to exit
+	pthread_join(senderThreadID, NULL);
+
+	// Shutting down the connection should break the stop thread which is stalling on the socket recv() function
+	if (socket) {
+		logg->logMessage("Waiting on stop thread");
+		socket->shutdownConnection();
+		pthread_join(stopThreadID, NULL);
+	}
+
+	// Write the captured xml file
+	if (gSessionData->mLocalCapture) {
+		CapturedXML capturedXML;
+		capturedXML.write(gSessionData->mAPCDir);
+	}
+
+	logg->logMessage("Profiling ended.");
+
+	delete ftraceSource;
+	delete userSpaceSource;
+	delete externalSource;
+	delete primarySource;
+	delete sender;
+	delete localCapture;
+}
diff --git a/tools/gator/daemon/Child.h b/tools/gator/daemon/Child.h
new file mode 100644
index 000000000000..cc78202ceb5c
--- /dev/null
+++ b/tools/gator/daemon/Child.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CHILD_H__
+#define __CHILD_H__
+
+class OlySocket;
+
+class Child {
+public:
+	Child();
+	Child(OlySocket* sock, int numConnections);
+	~Child();
+	void run();
+	OlySocket *socket;
+	void endSession();
+	int numExceptions;
+private:
+	int mNumConnections;
+
+	void initialization();
+
+	// Intentionally unimplemented
+	Child(const Child &);
+	Child &operator=(const Child &);
+};
+
+#endif //__CHILD_H__
diff --git a/tools/gator/daemon/Command.cpp b/tools/gator/daemon/Command.cpp
new file mode 100644
index 000000000000..28d73cf5a905
--- /dev/null
+++ b/tools/gator/daemon/Command.cpp
@@ -0,0 +1,172 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Command.h"
+
+#include <fcntl.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+static int getUid(const char *const name, char *const shPath, const char *const tmpDir) {
+	// Lookups may fail when using a different libc or a statically compiled executable
+	char gatorTemp[32];
+	snprintf(gatorTemp, sizeof(gatorTemp), "%s/gator_temp", tmpDir);
+
+	const int fd = open(gatorTemp, 600, O_CREAT | O_CLOEXEC);
+	if (fd < 0) {
+		return -1;
+	}
+	close(fd);
+
+	char cmd[128];
+	snprintf(cmd, sizeof(cmd), "chown %s %s || rm %s", name, gatorTemp, gatorTemp);
+
+	const int pid = fork();
+	if (pid < 0) {
+		logg->logError(__FILE__, __LINE__, "fork failed");
+		handleException();
+	}
+	if (pid == 0) {
+		char cargv1[] = "-c";
+		char *cargv[] = {
+			shPath,
+			cargv1,
+			cmd,
+			NULL,
+		};
+
+		execv(cargv[0], cargv);
+		exit(-1);
+	}
+	while ((waitpid(pid, NULL, 0) < 0) && (errno == EINTR));
+
+	struct stat st;
+	int result = -1;
+	if (stat(gatorTemp, &st) == 0) {
+		result = st.st_uid;
+	}
+	unlink(gatorTemp);
+	return result;
+}
+
+static int getUid(const char *const name) {
+	// Look up the username
+	struct passwd *const user = getpwnam(name);
+	if (user != NULL) {
+		return user->pw_uid;
+	}
+
+
+	// Are we on Linux
+	char cargv0l[] = "/bin/sh";
+	if ((access(cargv0l, X_OK) == 0) && (access("/tmp", W_OK) == 0)) {
+		return getUid(name, cargv0l, "/tmp");
+	}
+
+	// Are we on android
+	char cargv0a[] = "/system/bin/sh";
+	if ((access(cargv0a, X_OK) == 0) && (access("/data", W_OK) == 0)) {
+		return getUid(name, cargv0a, "/data");
+	}
+
+	return -1;
+}
+
+void *commandThread(void *) {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-command", 0, 0, 0);
+
+	const char *const name = gSessionData->mCaptureUser == NULL ? "nobody" : gSessionData->mCaptureUser;
+	const int uid = getUid(name);
+	if (uid < 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to lookup the user %s, please double check that the user exists", name);
+		handleException();
+	}
+
+	sleep(3);
+
+	char buf[128];
+	int pipefd[2];
+	if (pipe_cloexec(pipefd) != 0) {
+		logg->logError(__FILE__, __LINE__, "pipe failed");
+		handleException();
+	}
+
+	const int pid = fork();
+	if (pid < 0) {
+		logg->logError(__FILE__, __LINE__, "fork failed");
+		handleException();
+	}
+	if (pid == 0) {
+		char cargv0l[] = "/bin/sh";
+		char cargv0a[] = "/system/bin/sh";
+		char cargv1[] = "-c";
+		char *cargv[] = {
+			cargv0l,
+			cargv1,
+			gSessionData->mCaptureCommand,
+			NULL,
+		};
+
+		buf[0] = '\0';
+		close(pipefd[0]);
+
+		// Gator runs at a high priority, reset the priority to the default
+		if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) {
+			snprintf(buf, sizeof(buf), "setpriority failed");
+			goto fail_exit;
+		}
+
+		if (setuid(uid) != 0) {
+			snprintf(buf, sizeof(buf), "setuid failed");
+			goto fail_exit;
+		}
+
+		{
+			const char *const path = gSessionData->mCaptureWorkingDir == NULL ? "/" : gSessionData->mCaptureWorkingDir;
+			if (chdir(path) != 0) {
+				snprintf(buf, sizeof(buf), "Unable to cd to %s, please verify the directory exists and is accessable to %s", path, name);
+				goto fail_exit;
+			}
+		}
+
+		execv(cargv[0], cargv);
+		cargv[0] = cargv0a;
+		execv(cargv[0], cargv);
+		snprintf(buf, sizeof(buf), "execv failed");
+
+	fail_exit:
+		if (buf[0] != '\0') {
+			const ssize_t bytes = write(pipefd[1], buf, sizeof(buf));
+			// Can't do anything if this fails
+			(void)bytes;
+		}
+
+		exit(-1);
+	}
+
+	close(pipefd[1]);
+	const ssize_t bytes = read(pipefd[0], buf, sizeof(buf));
+	if (bytes > 0) {
+		logg->logError(__FILE__, __LINE__, buf);
+		handleException();
+	}
+	close(pipefd[0]);
+
+	return NULL;
+}
diff --git a/tools/gator/daemon/Command.h b/tools/gator/daemon/Command.h
new file mode 100644
index 000000000000..17244b7aaebc
--- /dev/null
+++ b/tools/gator/daemon/Command.h
@@ -0,0 +1,14 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COMMAND_H
+#define COMMAND_H
+
+void *commandThread(void *);
+
+#endif // COMMAND_H
diff --git a/tools/gator/daemon/Config.h b/tools/gator/daemon/Config.h
new file mode 100644
index 000000000000..bee383a1c797
--- /dev/null
+++ b/tools/gator/daemon/Config.h
@@ -0,0 +1,28 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+#define ARRAY_LENGTH(A) static_cast<int>(sizeof(A)/sizeof((A)[0]))
+#define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x))
+
+#define MAX_PERFORMANCE_COUNTERS 50
+#define NR_CPUS 32
+
+template<typename T>
+static inline T min(const T a, const T b) {
+	return (a < b ? a : b);
+}
+
+template<typename T>
+static inline T max(const T a, const T b) {
+	return (a > b ? a : b);
+}
+
+#endif // CONFIG_H
diff --git a/tools/gator/daemon/ConfigurationXML.cpp b/tools/gator/daemon/ConfigurationXML.cpp
new file mode 100644
index 000000000000..6590dd389196
--- /dev/null
+++ b/tools/gator/daemon/ConfigurationXML.cpp
@@ -0,0 +1,217 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ConfigurationXML.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <dirent.h>
+
+#include "Driver.h"
+#include "Logging.h"
+#include "OlyUtility.h"
+#include "SessionData.h"
+
+static const char* ATTR_COUNTER            = "counter";
+static const char* ATTR_REVISION           = "revision";
+static const char* ATTR_EVENT              = "event";
+static const char* ATTR_COUNT              = "count";
+static const char* ATTR_CORES              = "cores";
+
+ConfigurationXML::ConfigurationXML() {
+	const char * configuration_xml;
+	unsigned int configuration_xml_len;
+	getDefaultConfigurationXml(configuration_xml, configuration_xml_len);
+
+	char path[PATH_MAX];
+
+	getPath(path);
+	mConfigurationXML = util->readFromDisk(path);
+
+	for (int retryCount = 0; retryCount < 2; ++retryCount) {
+		if (mConfigurationXML == NULL) {
+			logg->logMessage("Unable to locate configuration.xml, using default in binary");
+			// null-terminate configuration_xml
+			mConfigurationXML = (char*)malloc(configuration_xml_len + 1);
+			memcpy(mConfigurationXML, (const void*)configuration_xml, configuration_xml_len);
+			mConfigurationXML[configuration_xml_len] = 0;
+		}
+
+		int ret = parse(mConfigurationXML);
+		if (ret == 1) {
+			remove();
+
+			// Free the current configuration and reload
+			free((void*)mConfigurationXML);
+			mConfigurationXML = NULL;
+			continue;
+		}
+
+		break;
+	}
+
+	validate();
+}
+
+ConfigurationXML::~ConfigurationXML() {
+	if (mConfigurationXML) {
+		free((void*)mConfigurationXML);
+	}
+}
+
+int ConfigurationXML::parse(const char* configurationXML) {
+	mxml_node_t *tree, *node;
+	int ret;
+
+	// clear counter overflow
+	gSessionData->mCounterOverflow = 0;
+	gSessionData->mIsEBS = false;
+	mIndex = 0;
+
+	// disable all counters prior to parsing the configuration xml
+	for (int i = 0; i < MAX_PERFORMANCE_COUNTERS; i++) {
+		gSessionData->mCounters[i].setEnabled(false);
+	}
+
+	tree = mxmlLoadString(NULL, configurationXML, MXML_NO_CALLBACK);
+
+	node = mxmlGetFirstChild(tree);
+	while (node && mxmlGetType(node) != MXML_ELEMENT)
+		node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
+
+	ret = configurationsTag(node);
+
+	node = mxmlGetFirstChild(node);
+	while (node) {
+		if (mxmlGetType(node) != MXML_ELEMENT) {
+			node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
+			continue;
+		}
+		configurationTag(node);
+		node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
+	}
+
+	mxmlDelete(tree);
+
+	return ret;
+}
+
+void ConfigurationXML::validate(void) {
+	for (int i = 0; i < MAX_PERFORMANCE_COUNTERS; i++) {
+		const Counter & counter = gSessionData->mCounters[i];
+		if (counter.isEnabled()) {
+			if (strcmp(counter.getType(), "") == 0) {
+				logg->logError(__FILE__, __LINE__, "Invalid required attribute in configuration.xml:\n  counter=\"%s\"\n  event=%d\n", counter.getType(), counter.getEvent());
+				handleException();
+			}
+
+			// iterate through the remaining enabled performance counters
+			for (int j = i + 1; j < MAX_PERFORMANCE_COUNTERS; j++) {
+				const Counter & counter2 = gSessionData->mCounters[j];
+				if (counter2.isEnabled()) {
+					// check if the types are the same
+					if (strcmp(counter.getType(), counter2.getType()) == 0) {
+						logg->logError(__FILE__, __LINE__, "Duplicate performance counter type in configuration.xml: %s", counter.getType());
+						handleException();
+					}
+				}
+			}
+		}
+	}
+}
+
+#define CONFIGURATION_REVISION 3
+int ConfigurationXML::configurationsTag(mxml_node_t *node) {
+	const char* revision_string;
+
+	revision_string = mxmlElementGetAttr(node, ATTR_REVISION);
+	if (!revision_string) {
+		return 1; //revision issue;
+	}
+
+	int revision = strtol(revision_string, NULL, 10);
+	if (revision < CONFIGURATION_REVISION) {
+		return 1; // revision issue
+	}
+
+	// A revision >= CONFIGURATION_REVISION is okay
+	// Greater than can occur when Streamline is newer than gator
+
+	return 0;
+}
+
+void ConfigurationXML::configurationTag(mxml_node_t *node) {
+	// handle all other performance counters
+	if (mIndex >= MAX_PERFORMANCE_COUNTERS) {
+		mIndex++;
+		gSessionData->mCounterOverflow = mIndex;
+		return;
+	}
+
+	// read attributes
+	Counter & counter = gSessionData->mCounters[mIndex];
+	counter.clear();
+	if (mxmlElementGetAttr(node, ATTR_COUNTER)) counter.setType(mxmlElementGetAttr(node, ATTR_COUNTER));
+	if (mxmlElementGetAttr(node, ATTR_EVENT)) counter.setEvent(strtol(mxmlElementGetAttr(node, ATTR_EVENT), NULL, 16));
+	if (mxmlElementGetAttr(node, ATTR_COUNT)) counter.setCount(strtol(mxmlElementGetAttr(node, ATTR_COUNT), NULL, 10));
+	if (mxmlElementGetAttr(node, ATTR_CORES)) counter.setCores(strtol(mxmlElementGetAttr(node, ATTR_CORES), NULL, 10));
+	if (counter.getCount() > 0) {
+		gSessionData->mIsEBS = true;
+	}
+	counter.setEnabled(true);
+
+	// Associate a driver with each counter
+	for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+		if (driver->claimCounter(counter)) {
+			if (counter.getDriver() != NULL) {
+				logg->logError(__FILE__, __LINE__, "More than one driver has claimed %s:%i", counter.getType(), counter.getEvent());
+				handleException();
+			}
+			counter.setDriver(driver);
+		}
+	}
+
+	// If no driver is associated with the counter, disable it
+	if (counter.getDriver() == NULL) {
+		logg->logMessage("No driver has claimed %s:%i", counter.getType(), counter.getEvent());
+		counter.setEnabled(false);
+	}
+
+	if (counter.isEnabled()) {
+		// update counter index
+		mIndex++;
+	}
+}
+
+void ConfigurationXML::getDefaultConfigurationXml(const char * & xml, unsigned int & len) {
+#include "defaults_xml.h" // defines and initializes char defaults_xml[] and int defaults_xml_len
+	xml = (const char *)defaults_xml;
+	len = defaults_xml_len;
+}
+
+void ConfigurationXML::getPath(char* path) {
+	if (gSessionData->mConfigurationXMLPath) {
+		strncpy(path, gSessionData->mConfigurationXMLPath, PATH_MAX);
+	} else {
+		if (util->getApplicationFullPath(path, PATH_MAX) != 0) {
+			logg->logMessage("Unable to determine the full path of gatord, the cwd will be used");
+		}
+		strncat(path, "configuration.xml", PATH_MAX - strlen(path) - 1);
+	}
+}
+
+void ConfigurationXML::remove() {
+	char path[PATH_MAX];
+	getPath(path);
+
+	if (::remove(path) != 0) {
+		logg->logError(__FILE__, __LINE__, "Invalid configuration.xml file detected and unable to delete it. To resolve, delete configuration.xml on disk");
+		handleException();
+	}
+	logg->logMessage("Invalid configuration.xml file detected and removed");
+}
diff --git a/tools/gator/daemon/ConfigurationXML.h b/tools/gator/daemon/ConfigurationXML.h
new file mode 100644
index 000000000000..efa415e508b6
--- /dev/null
+++ b/tools/gator/daemon/ConfigurationXML.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COUNTERS_H
+#define COUNTERS_H
+
+#include "mxml/mxml.h"
+
+class ConfigurationXML {
+public:
+	static void getDefaultConfigurationXml(const char * & xml, unsigned int & len);
+	static void getPath(char* path);
+	static void remove();
+
+	ConfigurationXML();
+	~ConfigurationXML();
+	const char* getConfigurationXML() {return mConfigurationXML;}
+	void validate(void);
+
+private:
+	char* mConfigurationXML;
+	int mIndex;
+
+	int parse(const char* xmlFile);
+	int configurationsTag(mxml_node_t *node);
+	void configurationTag(mxml_node_t *node);
+
+	// Intentionally unimplemented
+	ConfigurationXML(const ConfigurationXML &);
+	ConfigurationXML &operator=(const ConfigurationXML &);
+};
+
+#endif // COUNTERS_H
diff --git a/tools/gator/daemon/Counter.h b/tools/gator/daemon/Counter.h
new file mode 100644
index 000000000000..5202aa046362
--- /dev/null
+++ b/tools/gator/daemon/Counter.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COUNTER_H
+#define COUNTER_H
+
+#include <string.h>
+
+class Driver;
+
+class Counter {
+public:
+	static const size_t MAX_STRING_LEN = 80;
+	static const size_t MAX_DESCRIPTION_LEN = 400;
+
+	Counter () {
+		clear();
+	}
+
+	void clear () {
+		mType[0] = '\0';
+		mEnabled = false;
+		mEvent = -1;
+		mCount = 0;
+		mCores = -1;
+		mKey = 0;
+		mDriver = NULL;
+	}
+
+	void setType(const char *const type) { strncpy(mType, type, sizeof(mType)); mType[sizeof(mType) - 1] = '\0'; }
+	void setEnabled(const bool enabled) { mEnabled = enabled; }
+	void setEvent(const int event) { mEvent = event; }
+	void setCount(const int count) { mCount = count; }
+	void setCores(const int cores) { mCores = cores; }
+	void setKey(const int key) { mKey = key; }
+	void setDriver(Driver *const driver) { mDriver = driver; }
+
+	const char *getType() const { return mType;}
+	bool isEnabled() const { return mEnabled; }
+	int getEvent() const { return mEvent; }
+	int getCount() const { return mCount; }
+	int getCores() const { return mCores; }
+	int getKey() const { return mKey; }
+	Driver *getDriver() const { return mDriver; }
+
+private:
+	// Intentionally unimplemented
+	Counter(const Counter &);
+	Counter & operator=(const Counter &);
+
+	char mType[MAX_STRING_LEN];
+	bool mEnabled;
+	int mEvent;
+	int mCount;
+	int mCores;
+	int mKey;
+	Driver *mDriver;
+};
+
+#endif // COUNTER_H
diff --git a/tools/gator/daemon/DiskIODriver.cpp b/tools/gator/daemon/DiskIODriver.cpp
new file mode 100644
index 000000000000..5deb0f375f3a
--- /dev/null
+++ b/tools/gator/daemon/DiskIODriver.cpp
@@ -0,0 +1,125 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+// Define to get format macros from inttypes.h
+#define __STDC_FORMAT_MACROS
+
+#include "DiskIODriver.h"
+
+#include <inttypes.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class DiskIOCounter : public DriverCounter {
+public:
+	DiskIOCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~DiskIOCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+	int64_t mPrev;
+
+	// Intentionally unimplemented
+	DiskIOCounter(const DiskIOCounter &);
+	DiskIOCounter &operator=(const DiskIOCounter &);
+};
+
+DiskIOCounter::DiskIOCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value), mPrev(0) {
+}
+
+DiskIOCounter::~DiskIOCounter() {
+}
+
+int64_t DiskIOCounter::read() {
+	int64_t result = *mValue - mPrev;
+	mPrev = *mValue;
+	// Kernel assumes a sector is 512 bytes
+	return result << 9;
+}
+
+DiskIODriver::DiskIODriver() : mBuf(), mReadBytes(0), mWriteBytes(0) {
+}
+
+DiskIODriver::~DiskIODriver() {
+}
+
+void DiskIODriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new DiskIOCounter(getCounters(), strdup("Linux_block_rq_rd"), &mReadBytes));
+	setCounters(new DiskIOCounter(getCounters(), strdup("Linux_block_rq_wr"), &mWriteBytes));
+}
+
+void DiskIODriver::doRead() {
+	if (!countersEnabled()) {
+		return;
+	}
+
+	if (!mBuf.read("/proc/diskstats")) {
+		logg->logError(__FILE__, __LINE__, "Unable to read /proc/diskstats");
+		handleException();
+	}
+
+	mReadBytes = 0;
+	mWriteBytes = 0;
+
+	char *lastName = NULL;
+	int lastNameLen = -1;
+	char *start = mBuf.getBuf();
+	while (*start != '\0') {
+		char *end = strchr(start, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+
+		int nameStart = -1;
+		int nameEnd = -1;
+		int64_t readBytes = -1;
+		int64_t writeBytes = -1;
+		const int count = sscanf(start, "%*d %*d %n%*s%n %*u %*u %" SCNu64 " %*u %*u %*u %" SCNu64, &nameStart, &nameEnd, &readBytes, &writeBytes);
+		if (count != 2) {
+			logg->logError(__FILE__, __LINE__, "Unable to parse /proc/diskstats");
+			handleException();
+		}
+
+		// Skip partitions which are identified if the name is a substring of the last non-partition
+		if ((lastName == NULL) || (strncmp(lastName, start + nameStart, lastNameLen) != 0)) {
+			lastName = start + nameStart;
+			lastNameLen = nameEnd - nameStart;
+			mReadBytes += readBytes;
+			mWriteBytes += writeBytes;
+		}
+
+		if (end == NULL) {
+			break;
+		}
+		start = end + 1;
+	}
+}
+
+void DiskIODriver::start() {
+	doRead();
+	// Initialize previous values
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		counter->read();
+	}
+}
+
+void DiskIODriver::read(Buffer *const buffer) {
+	doRead();
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/DiskIODriver.h b/tools/gator/daemon/DiskIODriver.h
new file mode 100644
index 000000000000..d0db18c77d04
--- /dev/null
+++ b/tools/gator/daemon/DiskIODriver.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef DISKIODRIVER_H
+#define DISKIODRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class DiskIODriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	DiskIODriver();
+	~DiskIODriver();
+
+	void readEvents(mxml_node_t *const root);
+	void start();
+	void read(Buffer *const buffer);
+
+private:
+	void doRead();
+
+	DynBuf mBuf;
+	int64_t mReadBytes;
+	int64_t mWriteBytes;
+
+	// Intentionally unimplemented
+	DiskIODriver(const DiskIODriver &);
+	DiskIODriver &operator=(const DiskIODriver &);
+};
+
+#endif // DISKIODRIVER_H
diff --git a/tools/gator/daemon/Driver.cpp b/tools/gator/daemon/Driver.cpp
new file mode 100644
index 000000000000..275da31c7a0d
--- /dev/null
+++ b/tools/gator/daemon/Driver.cpp
@@ -0,0 +1,96 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Driver.h"
+
+#include "Buffer.h"
+#include "SessionData.h"
+
+DriverCounter::DriverCounter(DriverCounter *const next, const char *const name) : mNext(next), mName(name), mKey(getEventKey()), mEnabled(false) {
+}
+
+DriverCounter::~DriverCounter() {
+	delete mName;
+}
+
+Driver *Driver::head = NULL;
+
+Driver::Driver() : next(head) {
+	head = this;
+}
+
+SimpleDriver::~SimpleDriver() {
+	DriverCounter *counters = mCounters;
+	while (counters != NULL) {
+		DriverCounter *counter = counters;
+		counters = counter->getNext();
+		delete counter;
+	}
+}
+
+DriverCounter *SimpleDriver::findCounter(const Counter &counter) const {
+	for (DriverCounter *driverCounter = mCounters; driverCounter != NULL; driverCounter = driverCounter->getNext()) {
+		if (strcmp(driverCounter->getName(), counter.getType()) == 0) {
+			return driverCounter;
+		}
+	}
+
+	return NULL;
+}
+
+bool SimpleDriver::claimCounter(const Counter &counter) const {
+	return findCounter(counter) != NULL;
+}
+
+bool SimpleDriver::countersEnabled() const {
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void SimpleDriver::resetCounters() {
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		counter->setEnabled(false);
+	}
+}
+
+void SimpleDriver::setupCounter(Counter &counter) {
+	DriverCounter *const driverCounter = findCounter(counter);
+	if (driverCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+	driverCounter->setEnabled(true);
+	counter.setKey(driverCounter->getKey());
+}
+
+int SimpleDriver::writeCounters(mxml_node_t *root) const {
+	int count = 0;
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		mxml_node_t *node = mxmlNewElement(root, "counter");
+		mxmlElementSetAttr(node, "name", counter->getName());
+		++count;
+	}
+
+	return count;
+}
+
+PolledDriver::~PolledDriver() {
+}
+
+void PolledDriver::read(Buffer *const buffer) {
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		buffer->event64(counter->getKey(), counter->read());
+	}
+}
diff --git a/tools/gator/daemon/Driver.h b/tools/gator/daemon/Driver.h
new file mode 100644
index 000000000000..72870e3dbca1
--- /dev/null
+++ b/tools/gator/daemon/Driver.h
@@ -0,0 +1,118 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef DRIVER_H
+#define DRIVER_H
+
+#include <stdint.h>
+
+#include "mxml/mxml.h"
+
+class Buffer;
+class Counter;
+
+class DriverCounter {
+public:
+	DriverCounter(DriverCounter *const next, const char *const name);
+	virtual ~DriverCounter();
+
+	DriverCounter *getNext() const { return mNext; }
+	const char *getName() const { return mName; }
+	int getKey() const { return mKey; }
+	bool isEnabled() const { return mEnabled; }
+	void setEnabled(const bool enabled) { mEnabled = enabled; }
+	virtual int64_t read() { return -1; }
+
+private:
+	DriverCounter *const mNext;
+	const char *const mName;
+	const int mKey;
+	bool mEnabled;
+
+	// Intentionally unimplemented
+	DriverCounter(const DriverCounter &);
+	DriverCounter &operator=(const DriverCounter &);
+};
+
+class Driver {
+public:
+	static Driver *getHead() { return head; }
+
+	virtual ~Driver() {}
+
+	// Returns true if this driver can manage the counter
+	virtual bool claimCounter(const Counter &counter) const = 0;
+	// Clears and disables all counters
+	virtual void resetCounters() = 0;
+	// Enables and prepares the counter for capture
+	virtual void setupCounter(Counter &counter) = 0;
+
+	// Performs any actions needed for setup or based on eventsXML
+	virtual void readEvents(mxml_node_t *const) {}
+	// Emits available counters
+	virtual int writeCounters(mxml_node_t *const root) const = 0;
+	// Emits possible dynamically generated events/counters
+	virtual void writeEvents(mxml_node_t *const) const {}
+
+	Driver *getNext() const { return next; }
+
+protected:
+	Driver();
+
+private:
+	static Driver *head;
+	Driver *next;
+
+	// Intentionally unimplemented
+	Driver(const Driver &);
+	Driver &operator=(const Driver &);
+};
+
+class SimpleDriver : public Driver {
+public:
+	virtual ~SimpleDriver();
+
+	bool claimCounter(const Counter &counter) const;
+	bool countersEnabled() const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+	int writeCounters(mxml_node_t *root) const;
+
+protected:
+	SimpleDriver() : mCounters(NULL) {}
+
+	DriverCounter *getCounters() const { return mCounters; }
+	void setCounters(DriverCounter *const counter) { mCounters = counter; }
+
+	DriverCounter *findCounter(const Counter &counter) const;
+
+private:
+	DriverCounter *mCounters;
+
+	// Intentionally unimplemented
+	SimpleDriver(const SimpleDriver &);
+	SimpleDriver &operator=(const SimpleDriver &);
+};
+
+class PolledDriver : public SimpleDriver {
+public:
+	virtual ~PolledDriver();
+
+	virtual void start() {}
+	virtual void read(Buffer *const buffer);
+
+protected:
+	PolledDriver() {}
+
+private:
+	// Intentionally unimplemented
+	PolledDriver(const PolledDriver &);
+	PolledDriver &operator=(const PolledDriver &);
+};
+
+#endif // DRIVER_H
diff --git a/tools/gator/daemon/DriverSource.cpp b/tools/gator/daemon/DriverSource.cpp
new file mode 100644
index 000000000000..7f299b646952
--- /dev/null
+++ b/tools/gator/daemon/DriverSource.cpp
@@ -0,0 +1,322 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+// Define to get format macros from inttypes.h
+#define __STDC_FORMAT_MACROS
+
+#include "DriverSource.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Child.h"
+#include "DynBuf.h"
+#include "Fifo.h"
+#include "Logging.h"
+#include "Proc.h"
+#include "Sender.h"
+#include "SessionData.h"
+
+extern Child *child;
+
+DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL), mFifo(NULL), mSenderSem(senderSem), mStartProfile(startProfile), mBufferSize(0), mBufferFD(0), mLength(1) {
+	int driver_version = 0;
+
+	mBuffer = new Buffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem);
+	if (readIntDriver("/dev/gator/version", &driver_version) == -1) {
+		logg->logError(__FILE__, __LINE__, "Error reading gator driver version");
+		handleException();
+	}
+
+	// Verify the driver version matches the daemon version
+	if (driver_version != PROTOCOL_VERSION) {
+		if ((driver_version > PROTOCOL_DEV) || (PROTOCOL_VERSION > PROTOCOL_DEV)) {
+			// One of the mismatched versions is development version
+			logg->logError(__FILE__, __LINE__,
+				"DEVELOPMENT BUILD MISMATCH: gator driver version \"%d\" is not in sync with gator daemon version \"%d\".\n"
+				">> The following must be synchronized from engineering repository:\n"
+				">> * gator driver\n"
+				">> * gator daemon\n"
+				">> * Streamline", driver_version, PROTOCOL_VERSION);
+			handleException();
+		} else {
+			// Release version mismatch
+			logg->logError(__FILE__, __LINE__,
+				"gator driver version \"%d\" is different than gator daemon version \"%d\".\n"
+				">> Please upgrade the driver and daemon to the latest versions.", driver_version, PROTOCOL_VERSION);
+			handleException();
+		}
+	}
+
+	int enable = -1;
+	if (readIntDriver("/dev/gator/enable", &enable) != 0 || enable != 0) {
+		logg->logError(__FILE__, __LINE__, "Driver already enabled, possibly a session is already in progress.");
+		handleException();
+	}
+
+	readIntDriver("/dev/gator/cpu_cores", &gSessionData->mCores);
+	if (gSessionData->mCores == 0) {
+		gSessionData->mCores = 1;
+	}
+
+	if (readIntDriver("/dev/gator/buffer_size", &mBufferSize) || mBufferSize <= 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to read the driver buffer size");
+		handleException();
+	}
+}
+
+DriverSource::~DriverSource() {
+	delete mFifo;
+
+	// Write zero for safety, as a zero should have already been written
+	writeDriver("/dev/gator/enable", "0");
+
+	// Calls event_buffer_release in the driver
+	if (mBufferFD) {
+		close(mBufferFD);
+	}
+}
+
+bool DriverSource::prepare() {
+	// Create user-space buffers, add 5 to the size to account for the 1-byte type and 4-byte length
+	logg->logMessage("Created %d MB collector buffer with a %d-byte ragged end", gSessionData->mTotalBufferSize, mBufferSize);
+	mFifo = new Fifo(mBufferSize + 5, gSessionData->mTotalBufferSize*1024*1024, mSenderSem);
+
+	return true;
+}
+
+void DriverSource::bootstrapThread() {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-proc", 0, 0, 0);
+
+	DynBuf printb;
+	DynBuf b1;
+	DynBuf b2;
+	const uint64_t currTime = getTime();
+
+	if (!readProcComms(currTime, mBuffer, &printb, &b1, &b2)) {
+		logg->logError(__FILE__, __LINE__, "readProcComms failed");
+		handleException();
+	}
+
+	mBuffer->commit(currTime);
+	mBuffer->setDone();
+}
+
+void *DriverSource::bootstrapThreadStatic(void *arg) {
+	static_cast<DriverSource *>(arg)->bootstrapThread();
+	return NULL;
+}
+
+void DriverSource::run() {
+	// Get the initial pointer to the collect buffer
+	char *collectBuffer = mFifo->start();
+	int bytesCollected = 0;
+
+	logg->logMessage("********** Profiling started **********");
+
+	// Set the maximum backtrace depth
+	if (writeReadDriver("/dev/gator/backtrace_depth", &gSessionData->mBacktraceDepth)) {
+		logg->logError(__FILE__, __LINE__, "Unable to set the driver backtrace depth");
+		handleException();
+	}
+
+	// open the buffer which calls userspace_buffer_open() in the driver
+	mBufferFD = open("/dev/gator/buffer", O_RDONLY | O_CLOEXEC);
+	if (mBufferFD < 0) {
+		logg->logError(__FILE__, __LINE__, "The gator driver did not set up properly. Please view the linux console or dmesg log for more information on the failure.");
+		handleException();
+	}
+
+	// set the tick rate of the profiling timer
+	if (writeReadDriver("/dev/gator/tick", &gSessionData->mSampleRate) != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to set the driver tick");
+		handleException();
+	}
+
+	// notify the kernel of the response type
+	int response_type = gSessionData->mLocalCapture ? 0 : RESPONSE_APC_DATA;
+	if (writeDriver("/dev/gator/response_type", response_type)) {
+		logg->logError(__FILE__, __LINE__, "Unable to write the response type");
+		handleException();
+	}
+
+	// Set the live rate
+	if (writeReadDriver("/dev/gator/live_rate", &gSessionData->mLiveRate)) {
+		logg->logError(__FILE__, __LINE__, "Unable to set the driver live rate");
+		handleException();
+	}
+
+	logg->logMessage("Start the driver");
+
+	// This command makes the driver start profiling by calling gator_op_start() in the driver
+	if (writeDriver("/dev/gator/enable", "1") != 0) {
+		logg->logError(__FILE__, __LINE__, "The gator driver did not start properly. Please view the linux console or dmesg log for more information on the failure.");
+		handleException();
+	}
+
+	lseek(mBufferFD, 0, SEEK_SET);
+
+	sem_post(mStartProfile);
+
+	pthread_t bootstrapThreadID;
+	if (pthread_create(&bootstrapThreadID, NULL, bootstrapThreadStatic, this) != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to start the gator_bootstrap thread");
+		handleException();
+	}
+
+	// Collect Data
+	do {
+		// This command will stall until data is received from the driver
+		// Calls event_buffer_read in the driver
+		errno = 0;
+		bytesCollected = read(mBufferFD, collectBuffer, mBufferSize);
+
+		// If read() returned due to an interrupt signal, re-read to obtain the last bit of collected data
+		if (bytesCollected == -1 && errno == EINTR) {
+			bytesCollected = read(mBufferFD, collectBuffer, mBufferSize);
+		}
+
+		// return the total bytes written
+		logg->logMessage("Driver read of %d bytes", bytesCollected);
+
+		// In one shot mode, stop collection once all the buffers are filled
+		if (gSessionData->mOneShot && gSessionData->mSessionIsActive) {
+			if (bytesCollected == -1 || mFifo->willFill(bytesCollected)) {
+				logg->logMessage("One shot");
+				child->endSession();
+			}
+		}
+		collectBuffer = mFifo->write(bytesCollected);
+	} while (bytesCollected > 0);
+
+	logg->logMessage("Exit collect data loop");
+
+	pthread_join(bootstrapThreadID, NULL);
+}
+
+void DriverSource::interrupt() {
+	// This command should cause the read() function in collect() to return and stop the driver from profiling
+	if (writeDriver("/dev/gator/enable", "0") != 0) {
+		logg->logMessage("Stopping kernel failed");
+	}
+}
+
+bool DriverSource::isDone() {
+	return mLength <= 0 && (mBuffer == NULL || mBuffer->isDone());
+}
+
+void DriverSource::write(Sender *sender) {
+	char *data = mFifo->read(&mLength);
+	if (data != NULL) {
+		sender->writeData(data, mLength, RESPONSE_APC_DATA);
+		mFifo->release();
+		// Assume the summary packet is in the first block received from the driver
+		gSessionData->mSentSummary = true;
+	}
+	if (mBuffer != NULL && !mBuffer->isDone()) {
+		mBuffer->write(sender);
+		if (mBuffer->isDone()) {
+			Buffer *buf = mBuffer;
+			mBuffer = NULL;
+			delete buf;
+		}
+	}
+}
+
+int DriverSource::readIntDriver(const char *fullpath, int *value) {
+	char data[40]; // Sufficiently large to hold any integer
+	const int fd = open(fullpath, O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		return -1;
+	}
+
+	const ssize_t bytes = read(fd, data, sizeof(data) - 1);
+	close(fd);
+	if (bytes < 0) {
+		return -1;
+	}
+	data[bytes] = '\0';
+
+	char *endptr;
+	errno = 0;
+	*value = strtol(data, &endptr, 10);
+	if (errno != 0 || *endptr != '\n') {
+		logg->logMessage("Invalid value in file %s", fullpath);
+		return -1;
+	}
+
+	return 0;
+}
+
+int DriverSource::readInt64Driver(const char *fullpath, int64_t *value) {
+	char data[40]; // Sufficiently large to hold any integer
+	const int fd = open(fullpath, O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		return -1;
+	}
+
+	const ssize_t bytes = read(fd, data, sizeof(data) - 1);
+	close(fd);
+	if (bytes < 0) {
+		return -1;
+	}
+	data[bytes] = '\0';
+
+	char *endptr;
+	errno = 0;
+	*value = strtoll(data, &endptr, 10);
+	if (errno != 0 || (*endptr != '\n' && *endptr != '\0')) {
+		logg->logMessage("Invalid value in file %s", fullpath);
+		return -1;
+	}
+
+	return 0;
+}
+
+int DriverSource::writeDriver(const char *fullpath, const char *data) {
+	int fd = open(fullpath, O_WRONLY | O_CLOEXEC);
+	if (fd < 0) {
+		return -1;
+	}
+	if (::write(fd, data, strlen(data)) < 0) {
+		close(fd);
+		logg->logMessage("Opened but could not write to %s", fullpath);
+		return -1;
+	}
+	close(fd);
+	return 0;
+}
+
+int DriverSource::writeDriver(const char *path, int value) {
+	char data[40]; // Sufficiently large to hold any integer
+	snprintf(data, sizeof(data), "%d", value);
+	return writeDriver(path, data);
+}
+
+int DriverSource::writeDriver(const char *path, int64_t value) {
+	char data[40]; // Sufficiently large to hold any integer
+	snprintf(data, sizeof(data), "%" PRIi64, value);
+	return writeDriver(path, data);
+}
+
+int DriverSource::writeReadDriver(const char *path, int *value) {
+	if (writeDriver(path, *value) || readIntDriver(path, value)) {
+		return -1;
+	}
+	return 0;
+}
+
+int DriverSource::writeReadDriver(const char *path, int64_t *value) {
+	if (writeDriver(path, *value) || readInt64Driver(path, value)) {
+		return -1;
+	}
+	return 0;
+}
diff --git a/tools/gator/daemon/DriverSource.h b/tools/gator/daemon/DriverSource.h
new file mode 100644
index 000000000000..ec27b0815bbf
--- /dev/null
+++ b/tools/gator/daemon/DriverSource.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef DRIVERSOURCE_H
+#define DRIVERSOURCE_H
+
+#include <semaphore.h>
+#include <stdint.h>
+
+#include "Source.h"
+
+class Buffer;
+class Fifo;
+
+class DriverSource : public Source {
+public:
+	DriverSource(sem_t *senderSem, sem_t *startProfile);
+	~DriverSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+	static int readIntDriver(const char *fullpath, int *value);
+	static int readInt64Driver(const char *fullpath, int64_t *value);
+	static int writeDriver(const char *fullpath, const char *data);
+	static int writeDriver(const char *path, int value);
+	static int writeDriver(const char *path, int64_t value);
+	static int writeReadDriver(const char *path, int *value);
+	static int writeReadDriver(const char *path, int64_t *value);
+
+private:
+	static void *bootstrapThreadStatic(void *arg);
+	void bootstrapThread();
+
+	Buffer *mBuffer;
+	Fifo *mFifo;
+	sem_t *const mSenderSem;
+	sem_t *const mStartProfile;
+	int mBufferSize;
+	int mBufferFD;
+	int mLength;
+
+	// Intentionally unimplemented
+	DriverSource(const DriverSource &);
+	DriverSource &operator=(const DriverSource &);
+};
+
+#endif // DRIVERSOURCE_H
diff --git a/tools/gator/daemon/DynBuf.cpp b/tools/gator/daemon/DynBuf.cpp
new file mode 100644
index 000000000000..df20713ad63c
--- /dev/null
+++ b/tools/gator/daemon/DynBuf.cpp
@@ -0,0 +1,139 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "DynBuf.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "Logging.h"
+
+// Pick an aggressive size as buffer is primarily used for disk IO
+#define MIN_BUFFER_FREE (1 << 12)
+
+int DynBuf::resize(const size_t minCapacity) {
+	size_t scaledCapacity = 2 * capacity;
+	if (scaledCapacity < minCapacity) {
+		scaledCapacity = minCapacity;
+	}
+	if (scaledCapacity < 2 * MIN_BUFFER_FREE) {
+		scaledCapacity = 2 * MIN_BUFFER_FREE;
+	}
+	capacity = scaledCapacity;
+
+	buf = static_cast<char *>(realloc(buf, capacity));
+	if (buf == NULL) {
+		return -errno;
+	}
+
+	return 0;
+}
+
+bool DynBuf::read(const char *const path) {
+	int result = false;
+
+	const int fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	length = 0;
+
+	for (;;) {
+		const size_t minCapacity = length + MIN_BUFFER_FREE + 1;
+		if (capacity < minCapacity) {
+			if (resize(minCapacity) != 0) {
+				logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__);
+				goto fail;
+			}
+		}
+
+		const ssize_t bytes = ::read(fd, buf + length, capacity - length - 1);
+		if (bytes < 0) {
+			logg->logMessage("%s(%s:%i): read failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		} else if (bytes == 0) {
+			break;
+		}
+		length += bytes;
+	}
+
+	buf[length] = '\0';
+	result = true;
+
+ fail:
+	close(fd);
+
+	return result;
+}
+
+int DynBuf::readlink(const char *const path) {
+	ssize_t bytes = MIN_BUFFER_FREE;
+
+	for (;;) {
+		if (static_cast<size_t>(bytes) >= capacity) {
+			const int err = resize(2 * bytes);
+			if (err != 0) {
+				return err;
+			}
+		}
+		bytes = ::readlink(path, buf, capacity);
+		if (bytes < 0) {
+			return -errno;
+		} else if (static_cast<size_t>(bytes) < capacity) {
+			break;
+		}
+	}
+
+	length = bytes;
+	buf[bytes] = '\0';
+
+	return 0;
+}
+
+bool DynBuf::printf(const char *format, ...) {
+	va_list ap;
+
+	if (capacity <= 0) {
+		if (resize(2 * MIN_BUFFER_FREE) != 0) {
+			logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+	}
+
+	va_start(ap, format);
+	int bytes = vsnprintf(buf, capacity, format, ap);
+	va_end(ap);
+	if (bytes < 0) {
+		logg->logMessage("%s(%s:%i): fsnprintf failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	if (static_cast<size_t>(bytes) > capacity) {
+		if (resize(bytes + 1) != 0) {
+			logg->logMessage("%s(%s:%i): DynBuf::resize failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+
+		va_start(ap, format);
+		bytes = vsnprintf(buf, capacity, format, ap);
+		va_end(ap);
+		if (bytes < 0) {
+			logg->logMessage("%s(%s:%i): fsnprintf failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+	}
+
+	length = bytes;
+
+	return true;
+}
diff --git a/tools/gator/daemon/DynBuf.h b/tools/gator/daemon/DynBuf.h
new file mode 100644
index 000000000000..2f4554ab2e49
--- /dev/null
+++ b/tools/gator/daemon/DynBuf.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef DYNBUF_H
+#define DYNBUF_H
+
+#include <stdlib.h>
+
+class DynBuf {
+public:
+	DynBuf() : capacity(0), length(0), buf(NULL) {}
+	~DynBuf() {
+		reset();
+	}
+
+	inline void reset() {
+		capacity = 0;
+		length = 0;
+		if (buf != NULL) {
+			free(buf);
+			buf = NULL;
+		}
+	}
+
+	bool read(const char *const path);
+	// On error instead of printing the error and returning false, this returns -errno
+	int readlink(const char *const path);
+	__attribute__ ((format(printf, 2, 3)))
+	bool printf(const char *format, ...);
+
+	size_t getLength() const { return length; }
+	const char *getBuf() const { return buf; }
+	char *getBuf() { return buf; }
+
+private:
+	int resize(const size_t minCapacity);
+
+	size_t capacity;
+	size_t length;
+	char *buf;
+
+	// Intentionally undefined
+	DynBuf(const DynBuf &);
+	DynBuf &operator=(const DynBuf &);
+};
+
+#endif // DYNBUF_H
diff --git a/tools/gator/daemon/EventsXML.cpp b/tools/gator/daemon/EventsXML.cpp
new file mode 100644
index 000000000000..d905bbabe988
--- /dev/null
+++ b/tools/gator/daemon/EventsXML.cpp
@@ -0,0 +1,76 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "EventsXML.h"
+
+#include "CapturedXML.h"
+#include "Logging.h"
+#include "OlyUtility.h"
+#include "SessionData.h"
+
+mxml_node_t *EventsXML::getTree() {
+#include "events_xml.h" // defines and initializes char events_xml[] and int events_xml_len
+	char path[PATH_MAX];
+	mxml_node_t *xml;
+	FILE *fl;
+
+	// Avoid unused variable warning
+	(void)events_xml_len;
+
+	// Load the provided or default events xml
+	if (gSessionData->mEventsXMLPath) {
+		strncpy(path, gSessionData->mEventsXMLPath, PATH_MAX);
+	} else {
+		util->getApplicationFullPath(path, PATH_MAX);
+		strncat(path, "events.xml", PATH_MAX - strlen(path) - 1);
+	}
+	fl = fopen(path, "r");
+	if (fl) {
+		xml = mxmlLoadFile(NULL, fl, MXML_NO_CALLBACK);
+		fclose(fl);
+	} else {
+		logg->logMessage("Unable to locate events.xml, using default");
+		xml = mxmlLoadString(NULL, (const char *)events_xml, MXML_NO_CALLBACK);
+	}
+
+	return xml;
+}
+
+char *EventsXML::getXML() {
+	mxml_node_t *xml = getTree();
+
+	// Add dynamic events from the drivers
+	mxml_node_t *events = mxmlFindElement(xml, xml, "events", NULL, NULL, MXML_DESCEND);
+	if (!events) {
+		logg->logError(__FILE__, __LINE__, "Unable to find <events> node in the events.xml");
+		handleException();
+	}
+	for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+		driver->writeEvents(events);
+	}
+
+	char *string = mxmlSaveAllocString(xml, mxmlWhitespaceCB);
+	mxmlDelete(xml);
+
+	return string;
+}
+
+void EventsXML::write(const char *path) {
+	char file[PATH_MAX];
+
+	// Set full path
+	snprintf(file, PATH_MAX, "%s/events.xml", path);
+
+	char *buf = getXML();
+	if (util->writeToDisk(file, buf) < 0) {
+		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file);
+		handleException();
+	}
+
+	free(buf);
+}
diff --git a/tools/gator/daemon/EventsXML.h b/tools/gator/daemon/EventsXML.h
new file mode 100644
index 000000000000..ff7a02fd3c78
--- /dev/null
+++ b/tools/gator/daemon/EventsXML.h
@@ -0,0 +1,21 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EVENTS_XML
+#define EVENTS_XML
+
+#include "mxml/mxml.h"
+
+class EventsXML {
+public:
+	mxml_node_t *getTree();
+	char *getXML();
+	void write(const char* path);
+};
+
+#endif // EVENTS_XML
diff --git a/tools/gator/daemon/ExternalSource.cpp b/tools/gator/daemon/ExternalSource.cpp
new file mode 100644
index 000000000000..8f5e6b684c53
--- /dev/null
+++ b/tools/gator/daemon/ExternalSource.cpp
@@ -0,0 +1,260 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ExternalSource.h"
+
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "Logging.h"
+#include "OlySocket.h"
+#include "SessionData.h"
+
+static const char MALI_VIDEO[] = "\0mali-video";
+static const char MALI_VIDEO_STARTUP[] = "\0mali-video-startup";
+static const char MALI_VIDEO_V1[] = "MALI_VIDEO 1\n";
+static const char MALI_GRAPHICS[] = "\0mali_thirdparty_server";
+static const char MALI_GRAPHICS_STARTUP[] = "\0mali_thirdparty_client";
+static const char MALI_GRAPHICS_V1[] = "MALI_GRAPHICS 1\n";
+
+static bool setNonblock(const int fd) {
+	int flags;
+
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0) {
+		logg->logMessage("fcntl getfl failed");
+		return false;
+	}
+
+	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) {
+		logg->logMessage("fcntl setfl failed");
+		return false;
+	}
+
+	return true;
+}
+
+ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mAnnotate(8083), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1) {
+	sem_init(&mBufferSem, 0, 0);
+}
+
+ExternalSource::~ExternalSource() {
+}
+
+void ExternalSource::waitFor(const int bytes) {
+	while (mBuffer.bytesAvailable() <= bytes) {
+		sem_wait(&mBufferSem);
+	}
+}
+
+void ExternalSource::configureConnection(const int fd, const char *const handshake, size_t size) {
+	if (!setNonblock(fd)) {
+		logg->logError(__FILE__, __LINE__, "Unable to set nonblock on fh");
+		handleException();
+	}
+
+	if (!mMonitor.add(fd)) {
+		logg->logError(__FILE__, __LINE__, "Unable to add fh to monitor");
+		handleException();
+	}
+
+	// Write the handshake to the circular buffer
+	waitFor(Buffer::MAXSIZE_PACK32 + size - 1);
+	mBuffer.packInt(fd);
+	mBuffer.writeBytes(handshake, size - 1);
+	mBuffer.commit(1);
+}
+
+bool ExternalSource::connectMali() {
+	mMaliUds = OlySocket::connect(MALI_GRAPHICS, sizeof(MALI_GRAPHICS));
+	if (mMaliUds < 0) {
+		return false;
+	}
+
+	configureConnection(mMaliUds, MALI_GRAPHICS_V1, sizeof(MALI_GRAPHICS_V1));
+
+	return true;
+}
+
+bool ExternalSource::connectMve() {
+	if (!gSessionData->maliVideo.countersEnabled()) {
+		return true;
+	}
+
+	mMveUds = OlySocket::connect(MALI_VIDEO, sizeof(MALI_VIDEO));
+	if (mMveUds < 0) {
+		return false;
+	}
+
+	if (!gSessionData->maliVideo.start(mMveUds)) {
+		return false;
+	}
+
+	configureConnection(mMveUds, MALI_VIDEO_V1, sizeof(MALI_VIDEO_V1));
+
+	return true;
+}
+
+bool ExternalSource::prepare() {
+	if (!mMonitor.init() ||
+			!setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd()) ||
+			!setNonblock(mMaliStartupUds.getFd()) || !mMonitor.add(mMaliStartupUds.getFd()) ||
+			!setNonblock(mAnnotate.getFd()) || !mMonitor.add(mAnnotate.getFd()) ||
+			false) {
+		return false;
+	}
+
+	connectMali();
+	connectMve();
+
+	return true;
+}
+
+void ExternalSource::run() {
+	int pipefd[2];
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-external", 0, 0, 0);
+
+	if (pipe_cloexec(pipefd) != 0) {
+		logg->logError(__FILE__, __LINE__, "pipe failed");
+		handleException();
+	}
+	mInterruptFd = pipefd[1];
+
+	if (!mMonitor.add(pipefd[0])) {
+		logg->logError(__FILE__, __LINE__, "Monitor::add failed");
+		handleException();
+	}
+
+	// Notify annotate clients to retry connecting to gatord
+	gSessionData->annotateListener.signal();
+
+	while (gSessionData->mSessionIsActive) {
+		struct epoll_event events[16];
+		// Clear any pending sem posts
+		while (sem_trywait(&mBufferSem) == 0);
+		int ready = mMonitor.wait(events, ARRAY_LENGTH(events), -1);
+		if (ready < 0) {
+			logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
+			handleException();
+		}
+
+		const uint64_t currTime = getTime();
+
+		for (int i = 0; i < ready; ++i) {
+			const int fd = events[i].data.fd;
+			if (fd == mMveStartupUds.getFd()) {
+				// Mali Video Engine says it's alive
+				int client = mMveStartupUds.acceptConnection();
+				// Don't read from this connection, establish a new connection to Mali-V500
+				close(client);
+				if (!connectMve()) {
+					logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali video connection");
+					handleException();
+				}
+			} else if (fd == mMaliStartupUds.getFd()) {
+				// Mali Graphics says it's alive
+				int client = mMaliStartupUds.acceptConnection();
+				// Don't read from this connection, establish a new connection to Mali Graphics
+				close(client);
+				if (!connectMali()) {
+					logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali graphics connection");
+					handleException();
+				}
+			} else if (fd == mAnnotate.getFd()) {
+				int client = mAnnotate.acceptConnection();
+				if (!setNonblock(client) || !mMonitor.add(client)) {
+					logg->logError(__FILE__, __LINE__, "Unable to set socket options on incoming annotation connection");
+					handleException();
+				}
+			} else if (fd == pipefd[0]) {
+				// Means interrupt has been called and mSessionIsActive should be reread
+			} else {
+				/* This can result in some starvation if there are multiple
+				 * threads which are annotating heavily, but it is not
+				 * recommended that threads annotate that much as it can also
+				 * starve out the gator data.
+				 */
+				while (gSessionData->mSessionIsActive) {
+					// Wait until there is enough room for the fd, two headers and two ints
+					waitFor(7*Buffer::MAXSIZE_PACK32 + 2*sizeof(uint32_t));
+					mBuffer.packInt(fd);
+					const int contiguous = mBuffer.contiguousSpaceAvailable();
+					const int bytes = read(fd, mBuffer.getWritePos(), contiguous);
+					if (bytes < 0) {
+						if (errno == EAGAIN) {
+							// Nothing left to read
+							mBuffer.commit(currTime);
+							break;
+						}
+						// Something else failed, close the socket
+						mBuffer.commit(currTime);
+						mBuffer.packInt(-1);
+						mBuffer.packInt(fd);
+						mBuffer.commit(currTime);
+						close(fd);
+						break;
+					} else if (bytes == 0) {
+						// The other side is closed
+						mBuffer.commit(currTime);
+						mBuffer.packInt(-1);
+						mBuffer.packInt(fd);
+						mBuffer.commit(currTime);
+						close(fd);
+						break;
+					}
+
+					mBuffer.advanceWrite(bytes);
+					mBuffer.commit(currTime);
+
+					// Short reads also mean nothing is left to read
+					if (bytes < contiguous) {
+						break;
+					}
+				}
+			}
+		}
+	}
+
+	mBuffer.setDone();
+
+	if (mMveUds >= 0) {
+		gSessionData->maliVideo.stop(mMveUds);
+	}
+
+	mInterruptFd = -1;
+	close(pipefd[0]);
+	close(pipefd[1]);
+}
+
+void ExternalSource::interrupt() {
+	if (mInterruptFd >= 0) {
+		int8_t c = 0;
+		// Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread
+		if (::write(mInterruptFd, &c, sizeof(c)) != sizeof(c)) {
+			logg->logError(__FILE__, __LINE__, "write failed");
+			handleException();
+		}
+	}
+}
+
+bool ExternalSource::isDone() {
+	return mBuffer.isDone();
+}
+
+void ExternalSource::write(Sender *sender) {
+	// Don't send external data until the summary packet is sent so that monotonic delta is available
+	if (!gSessionData->mSentSummary) {
+		return;
+	}
+	if (!mBuffer.isDone()) {
+		mBuffer.write(sender);
+		sem_post(&mBufferSem);
+	}
+}
diff --git a/tools/gator/daemon/ExternalSource.h b/tools/gator/daemon/ExternalSource.h
new file mode 100644
index 000000000000..919e75e8a41a
--- /dev/null
+++ b/tools/gator/daemon/ExternalSource.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXTERNALSOURCE_H
+#define EXTERNALSOURCE_H
+
+#include <semaphore.h>
+
+#include "Buffer.h"
+#include "Monitor.h"
+#include "OlySocket.h"
+#include "Source.h"
+
+// Counters from external sources like graphics drivers and annotations
+class ExternalSource : public Source {
+public:
+	ExternalSource(sem_t *senderSem);
+	~ExternalSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+private:
+	void waitFor(const int bytes);
+	void configureConnection(const int fd, const char *const handshake, size_t size);
+	bool connectMali();
+	bool connectMve();
+
+	sem_t mBufferSem;
+	Buffer mBuffer;
+	Monitor mMonitor;
+	OlyServerSocket mMveStartupUds;
+	OlyServerSocket mMaliStartupUds;
+	OlyServerSocket mAnnotate;
+	int mInterruptFd;
+	int mMaliUds;
+	int mMveUds;
+
+	// Intentionally unimplemented
+	ExternalSource(const ExternalSource &);
+	ExternalSource &operator=(const ExternalSource &);
+};
+
+#endif // EXTERNALSOURCE_H
diff --git a/tools/gator/daemon/FSDriver.cpp b/tools/gator/daemon/FSDriver.cpp
new file mode 100644
index 000000000000..dd8eb804dc99
--- /dev/null
+++ b/tools/gator/daemon/FSDriver.cpp
@@ -0,0 +1,158 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FSDriver.h"
+
+#include <fcntl.h>
+#include <regex.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "DriverSource.h"
+#include "Logging.h"
+
+class FSCounter : public DriverCounter {
+public:
+	FSCounter(DriverCounter *next, char *name, char *path, const char *regex);
+	~FSCounter();
+
+	const char *getPath() const { return mPath; }
+
+	int64_t read();
+
+private:
+	char *const mPath;
+	regex_t mReg;
+	bool mUseRegex;
+
+	// Intentionally unimplemented
+	FSCounter(const FSCounter &);
+	FSCounter &operator=(const FSCounter &);
+};
+
+FSCounter::FSCounter(DriverCounter *next, char *name, char *path, const char *regex) : DriverCounter(next, name), mPath(path), mUseRegex(regex != NULL) {
+	if (mUseRegex) {
+		int result = regcomp(&mReg, regex, REG_EXTENDED);
+		if (result != 0) {
+			char buf[128];
+			regerror(result, &mReg, buf, sizeof(buf));
+			logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf);
+			handleException();
+		}
+	}
+}
+
+FSCounter::~FSCounter() {
+	free(mPath);
+	if (mUseRegex) {
+		regfree(&mReg);
+	}
+}
+
+int64_t FSCounter::read() {
+	int64_t value;
+	if (mUseRegex) {
+		char buf[4096];
+		size_t pos = 0;
+		const int fd = open(mPath, O_RDONLY | O_CLOEXEC);
+		if (fd < 0) {
+			goto fail;
+		}
+		while (pos < sizeof(buf) - 1) {
+			const ssize_t bytes = ::read(fd, buf + pos, sizeof(buf) - pos - 1);
+			if (bytes < 0) {
+				goto fail;
+			} else if (bytes == 0) {
+				break;
+			}
+			pos += bytes;
+		}
+		close(fd);
+		buf[pos] = '\0';
+
+		regmatch_t match[2];
+		int result = regexec(&mReg, buf, 2, match, 0);
+		if (result != 0) {
+			regerror(result, &mReg, buf, sizeof(buf));
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, buf);
+			handleException();
+		}
+
+		if (match[1].rm_so < 0) {
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed", mPath);
+			handleException();
+		}
+
+		errno = 0;
+		value = strtoll(buf + match[1].rm_so, NULL, 0);
+		if (errno != 0) {
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, strerror(errno));
+			handleException();
+		}
+	} else {
+		if (DriverSource::readInt64Driver(mPath, &value) != 0) {
+			goto fail;
+		}
+	}
+	return value;
+
+ fail:
+	logg->logError(__FILE__, __LINE__, "Unable to read %s", mPath);
+	handleException();
+}
+
+FSDriver::FSDriver() {
+}
+
+FSDriver::~FSDriver() {
+}
+
+void FSDriver::readEvents(mxml_node_t *const xml) {
+	mxml_node_t *node = xml;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			continue;
+		}
+
+		if (counter[0] == '/') {
+			logg->logError(__FILE__, __LINE__, "Old style filesystem counter (%s) detected, please create a new unique counter value and move the filename into the path attribute, see events-Filesystem.xml for examples", counter);
+			handleException();
+		}
+
+		if (strncmp(counter, "filesystem_", 11) != 0) {
+			continue;
+		}
+
+		const char *path = mxmlElementGetAttr(node, "path");
+		if (path == NULL) {
+			logg->logError(__FILE__, __LINE__, "The filesystem counter %s is missing the required path attribute", counter);
+			handleException();
+		}
+		const char *regex = mxmlElementGetAttr(node, "regex");
+		setCounters(new FSCounter(getCounters(), strdup(counter), strdup(path), regex));
+	}
+}
+
+int FSDriver::writeCounters(mxml_node_t *root) const {
+	int count = 0;
+	for (FSCounter *counter = static_cast<FSCounter *>(getCounters()); counter != NULL; counter = static_cast<FSCounter *>(counter->getNext())) {
+		if (access(counter->getPath(), R_OK) == 0) {
+			mxml_node_t *node = mxmlNewElement(root, "counter");
+			mxmlElementSetAttr(node, "name", counter->getName());
+			++count;
+		}
+	}
+
+	return count;
+}
diff --git a/tools/gator/daemon/FSDriver.h b/tools/gator/daemon/FSDriver.h
new file mode 100644
index 000000000000..a7dc8b4df9dd
--- /dev/null
+++ b/tools/gator/daemon/FSDriver.h
@@ -0,0 +1,29 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FSDRIVER_H
+#define FSDRIVER_H
+
+#include "Driver.h"
+
+class FSDriver : public PolledDriver {
+public:
+	FSDriver();
+	~FSDriver();
+
+	void readEvents(mxml_node_t *const xml);
+
+	int writeCounters(mxml_node_t *root) const;
+
+private:
+	// Intentionally unimplemented
+	FSDriver(const FSDriver &);
+	FSDriver &operator=(const FSDriver &);
+};
+
+#endif // FSDRIVER_H
diff --git a/tools/gator/daemon/Fifo.cpp b/tools/gator/daemon/Fifo.cpp
new file mode 100644
index 000000000000..41275fd287b8
--- /dev/null
+++ b/tools/gator/daemon/Fifo.cpp
@@ -0,0 +1,127 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Fifo.h"
+
+#include <stdlib.h>
+
+#include "Logging.h"
+
+// bufferSize is the amount of data to be filled
+// singleBufferSize is the maximum size that may be filled during a single write
+// (bufferSize + singleBufferSize) will be allocated
+Fifo::Fifo(int singleBufferSize, int bufferSize, sem_t* readerSem) {
+  mWrite = mRead = mReadCommit = mRaggedEnd = 0;
+  mWrapThreshold = bufferSize;
+  mSingleBufferSize = singleBufferSize;
+  mReaderSem = readerSem;
+  mBuffer = (char*)malloc(bufferSize + singleBufferSize);
+  mEnd = false;
+
+  if (mBuffer == NULL) {
+    logg->logError(__FILE__, __LINE__, "failed to allocate %d bytes", bufferSize + singleBufferSize);
+    handleException();
+  }
+
+  if (sem_init(&mWaitForSpaceSem, 0, 0)) {
+    logg->logError(__FILE__, __LINE__, "sem_init() failed");
+    handleException();
+  }
+}
+
+Fifo::~Fifo() {
+  free(mBuffer);
+  sem_destroy(&mWaitForSpaceSem);
+}
+
+int Fifo::numBytesFilled() const {
+  return mWrite - mRead + mRaggedEnd;
+}
+
+char* Fifo::start() const {
+  return mBuffer;
+}
+
+bool Fifo::isEmpty() const {
+  return mRead == mWrite && mRaggedEnd == 0;
+}
+
+bool Fifo::isFull() const {
+  return willFill(0);
+}
+
+// Determines if the buffer will fill assuming 'additional' bytes will be added to the buffer
+// 'full' means there is less than singleBufferSize bytes available contiguously; it does not mean there are zero bytes available
+bool Fifo::willFill(int additional) const {
+  if (mWrite > mRead) {
+    if (numBytesFilled() + additional < mWrapThreshold) {
+      return false;
+    }
+  } else {
+    if (numBytesFilled() + additional < mWrapThreshold - mSingleBufferSize) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// This function will stall until contiguous singleBufferSize bytes are available
+char* Fifo::write(int length) {
+  if (length <= 0) {
+    length = 0;
+    mEnd = true;
+  }
+
+  // update the write pointer
+  mWrite += length;
+
+  // handle the wrap-around
+  if (mWrite >= mWrapThreshold) {
+    mRaggedEnd = mWrite;
+    mWrite = 0;
+  }
+
+  // send a notification that data is ready
+  sem_post(mReaderSem);
+
+  // wait for space
+  while (isFull()) {
+    sem_wait(&mWaitForSpaceSem);
+  }
+
+  return &mBuffer[mWrite];
+}
+
+void Fifo::release() {
+  // update the read pointer now that the data has been handled
+  mRead = mReadCommit;
+
+  // handle the wrap-around
+  if (mRead >= mWrapThreshold) {
+    mRaggedEnd = mRead = mReadCommit = 0;
+  }
+
+  // send a notification that data is free (space is available)
+  sem_post(&mWaitForSpaceSem);
+}
+
+// This function will return null if no data is available
+char* Fifo::read(int *const length) {
+  // wait for data
+  if (isEmpty() && !mEnd) {
+    return NULL;
+  }
+
+  // obtain the length
+  do {
+    mReadCommit = mRaggedEnd ? mRaggedEnd : mWrite;
+    *length = mReadCommit - mRead;
+  } while (*length < 0); // plugs race condition without using semaphores
+
+  return &mBuffer[mRead];
+}
diff --git a/tools/gator/daemon/Fifo.h b/tools/gator/daemon/Fifo.h
new file mode 100644
index 000000000000..21c8d8580391
--- /dev/null
+++ b/tools/gator/daemon/Fifo.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __FIFO_H__
+#define __FIFO_H__
+
+#ifdef WIN32
+#include <windows.h>
+#define sem_t HANDLE
+#define sem_init(sem, pshared, value) ((*(sem) = CreateSemaphore(NULL, value, LONG_MAX, NULL)) == NULL)
+#define sem_wait(sem) WaitForSingleObject(*(sem), INFINITE)
+#define sem_post(sem) ReleaseSemaphore(*(sem), 1, NULL)
+#define sem_destroy(sem) CloseHandle(*(sem))
+#else
+#include <semaphore.h>
+#endif
+
+class Fifo {
+public:
+  Fifo(int singleBufferSize, int totalBufferSize, sem_t* readerSem);
+  ~Fifo();
+  int numBytesFilled() const;
+  bool isEmpty() const;
+  bool isFull() const;
+  bool willFill(int additional) const;
+  char* start() const;
+  char* write(int length);
+  void release();
+  char* read(int *const length);
+
+private:
+  int mSingleBufferSize, mWrite, mRead, mReadCommit, mRaggedEnd, mWrapThreshold;
+  sem_t mWaitForSpaceSem;
+  sem_t* mReaderSem;
+  char* mBuffer;
+  bool mEnd;
+
+  // Intentionally unimplemented
+  Fifo(const Fifo &);
+  Fifo &operator=(const Fifo &);
+};
+
+#endif //__FIFO_H__
diff --git a/tools/gator/daemon/FtraceDriver.cpp b/tools/gator/daemon/FtraceDriver.cpp
new file mode 100644
index 000000000000..b156f1c0b8b4
--- /dev/null
+++ b/tools/gator/daemon/FtraceDriver.cpp
@@ -0,0 +1,118 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FtraceDriver.h"
+
+#include <regex.h>
+
+#include "Logging.h"
+
+class FtraceCounter : public DriverCounter {
+public:
+	FtraceCounter(DriverCounter *next, char *name, const char *regex);
+	~FtraceCounter();
+
+	int read(const char *const line, int64_t *values);
+
+private:
+	regex_t reg;
+
+	// Intentionally unimplemented
+	FtraceCounter(const FtraceCounter &);
+	FtraceCounter &operator=(const FtraceCounter &);
+};
+
+FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *regex) : DriverCounter(next, name) {
+	int result = regcomp(&reg, regex, REG_EXTENDED);
+	if (result != 0) {
+		char buf[128];
+		regerror(result, &reg, buf, sizeof(buf));
+		logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf);
+		handleException();
+	}
+}
+
+FtraceCounter::~FtraceCounter() {
+	regfree(&reg);
+}
+
+int FtraceCounter::read(const char *const line, int64_t *values) {
+	regmatch_t match[2];
+	int result = regexec(&reg, line, 2, match, 0);
+	if (result != 0) {
+		// No match
+		return 0;
+	}
+
+	if (match[1].rm_so < 0) {
+		logg->logError(__FILE__, __LINE__, "Parsing %s failed", getName());
+		handleException();
+	}
+
+	errno = 0;
+	int64_t value = strtoll(line + match[1].rm_so, NULL, 0);
+	if (errno != 0) {
+		logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", getName(), strerror(errno));
+		handleException();
+	}
+
+	values[0] = getKey();
+	values[1] = value;
+
+	return 1;
+}
+
+FtraceDriver::FtraceDriver() : mValues(NULL) {
+}
+
+FtraceDriver::~FtraceDriver() {
+	delete [] mValues;
+}
+
+void FtraceDriver::readEvents(mxml_node_t *const xml) {
+	mxml_node_t *node = xml;
+	int count = 0;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			continue;
+		}
+
+		if (strncmp(counter, "ftrace_", 7) != 0) {
+			continue;
+		}
+
+		const char *regex = mxmlElementGetAttr(node, "regex");
+		if (regex == NULL) {
+			logg->logError(__FILE__, __LINE__, "The regex counter %s is missing the required regex attribute", counter);
+			handleException();
+		}
+		setCounters(new FtraceCounter(getCounters(), strdup(counter), regex));
+		++count;
+	}
+
+	mValues = new int64_t[2*count];
+}
+
+int FtraceDriver::read(const char *line, int64_t **buf) {
+	int count = 0;
+
+	for (FtraceCounter *counter = static_cast<FtraceCounter *>(getCounters()); counter != NULL; counter = static_cast<FtraceCounter *>(counter->getNext())) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		count += counter->read(line, mValues + 2*count);
+	}
+
+	*buf = mValues;
+	return count;
+}
diff --git a/tools/gator/daemon/FtraceDriver.h b/tools/gator/daemon/FtraceDriver.h
new file mode 100644
index 000000000000..5f958bec672c
--- /dev/null
+++ b/tools/gator/daemon/FtraceDriver.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FTRACEDRIVER_H
+#define FTRACEDRIVER_H
+
+#include "Driver.h"
+
+class FtraceDriver : public SimpleDriver {
+public:
+	FtraceDriver();
+	~FtraceDriver();
+
+	void readEvents(mxml_node_t *const xml);
+
+	int read(const char *line, int64_t **buf);
+
+private:
+	int64_t *mValues;
+
+	// Intentionally unimplemented
+	FtraceDriver(const FtraceDriver &);
+	FtraceDriver &operator=(const FtraceDriver &);
+};
+
+#endif // FTRACEDRIVER_H
diff --git a/tools/gator/daemon/FtraceSource.cpp b/tools/gator/daemon/FtraceSource.cpp
new file mode 100644
index 000000000000..521633357417
--- /dev/null
+++ b/tools/gator/daemon/FtraceSource.cpp
@@ -0,0 +1,158 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FtraceSource.h"
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+static void handler(int signum)
+{
+	(void)signum;
+};
+
+FtraceSource::FtraceSource(sem_t *senderSem) : mFtraceFh(NULL), mBuffer(0, FRAME_BLOCK_COUNTER, 128*1024, senderSem), mTid(-1), mTracingOn(0) {
+}
+
+FtraceSource::~FtraceSource() {
+}
+
+bool FtraceSource::prepare() {
+	{
+		struct sigaction act;
+		act.sa_handler = handler;
+		act.sa_flags = (int)SA_RESETHAND;
+		if (sigaction(SIGUSR1, &act, NULL) != 0) {
+			logg->logError(__FILE__, __LINE__, "sigaction failed: %s\n", strerror(errno));
+			handleException();
+		}
+	}
+
+	if (DriverSource::readIntDriver("/sys/kernel/debug/tracing/tracing_on", &mTracingOn)) {
+		logg->logError(__FILE__, __LINE__, "Unable to read if ftrace is enabled");
+		handleException();
+	}
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "0") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to turn ftrace off before truncating the buffer");
+		handleException();
+	}
+
+	{
+		int fd;
+		fd = open("/sys/kernel/debug/tracing/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666);
+		if (fd < 0) {
+			logg->logError(__FILE__, __LINE__, "Unable truncate ftrace buffer: %s", strerror(errno));
+			handleException();
+		}
+		close(fd);
+	}
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "perf") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later");
+		handleException();
+	}
+
+	mFtraceFh = fopen_cloexec("/sys/kernel/debug/tracing/trace_pipe", "rb");
+	if (mFtraceFh == NULL) {
+		logg->logError(__FILE__, __LINE__, "Unable to open trace_pipe");
+		handleException();
+	}
+
+	return true;
+}
+
+void FtraceSource::run() {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-ftrace", 0, 0, 0);
+	mTid = syscall(__NR_gettid);
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "1") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to turn ftrace on");
+		handleException();
+	}
+
+	while (gSessionData->mSessionIsActive) {
+		char buf[1<<12];
+
+		if (fgets(buf, sizeof(buf), mFtraceFh) == NULL) {
+			if (errno == EINTR) {
+				// Interrupted by interrupt - likely user request to terminate
+				break;
+			}
+			logg->logError(__FILE__, __LINE__, "Unable read trace data: %s", strerror(errno));
+			handleException();
+		}
+
+		const uint64_t currTime = getTime();
+
+		char *const colon = strstr(buf, ": ");
+		if (colon == NULL) {
+			logg->logError(__FILE__, __LINE__, "Unable find colon: %s", buf);
+			handleException();
+		}
+		*colon = '\0';
+
+		char *const space = strrchr(buf, ' ');
+		if (space == NULL) {
+			logg->logError(__FILE__, __LINE__, "Unable find space: %s", buf);
+			handleException();
+		}
+		*colon = ':';
+
+		int64_t *data = NULL;
+		int count = gSessionData->ftraceDriver.read(colon + 2, &data);
+		if (count > 0) {
+			errno = 0;
+			const long long time = strtod(space, NULL) * 1000000000;
+			if (errno != 0) {
+				logg->logError(__FILE__, __LINE__, "Unable to parse time: %s", strerror(errno));
+				handleException();
+			}
+			mBuffer.event64(-1, time);
+
+			for (int i = 0; i < count; ++i) {
+				mBuffer.event64(data[2*i + 0], data[2*i + 1]);
+			}
+
+			mBuffer.check(currTime);
+		}
+
+	}
+
+	mBuffer.setDone();
+
+	DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", mTracingOn);
+	fclose(mFtraceFh);
+	DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "local");
+}
+
+void FtraceSource::interrupt() {
+	// Closing the underlying file handle does not result in the read on the ftrace file handle to return, so send a signal to the thread
+	syscall(__NR_tgkill, getpid(), mTid, SIGUSR1);
+}
+
+bool FtraceSource::isDone() {
+	return mBuffer.isDone();
+}
+
+void FtraceSource::write(Sender *sender) {
+	// Don't send ftrace data until the summary packet is sent so that monotonic delta is available
+	if (!gSessionData->mSentSummary) {
+		return;
+	}
+	if (!mBuffer.isDone()) {
+		mBuffer.write(sender);
+	}
+}
diff --git a/tools/gator/daemon/FtraceSource.h b/tools/gator/daemon/FtraceSource.h
new file mode 100644
index 000000000000..2391b881494e
--- /dev/null
+++ b/tools/gator/daemon/FtraceSource.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FTRACESOURCE_H
+#define FTRACESOURCE_H
+
+#include <semaphore.h>
+#include <stdio.h>
+
+#include "Buffer.h"
+#include "Source.h"
+
+class FtraceSource : public Source {
+public:
+	FtraceSource(sem_t *senderSem);
+	~FtraceSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+private:
+	void waitFor(const int bytes);
+
+	FILE *mFtraceFh;
+	Buffer mBuffer;
+	int mTid;
+	int mTracingOn;
+
+	// Intentionally unimplemented
+	FtraceSource(const FtraceSource &);
+	FtraceSource &operator=(const FtraceSource &);
+};
+
+#endif // FTRACESOURCE_H
diff --git a/tools/gator/daemon/HwmonDriver.cpp b/tools/gator/daemon/HwmonDriver.cpp
new file mode 100644
index 000000000000..9d161ae5ac56
--- /dev/null
+++ b/tools/gator/daemon/HwmonDriver.cpp
@@ -0,0 +1,245 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "HwmonDriver.h"
+
+#include "libsensors/sensors.h"
+
+#include "Logging.h"
+
+// feature->type to input map
+static sensors_subfeature_type getInput(const sensors_feature_type type) {
+	switch (type) {
+	case SENSORS_FEATURE_IN: return SENSORS_SUBFEATURE_IN_INPUT;
+	case SENSORS_FEATURE_FAN: return SENSORS_SUBFEATURE_FAN_INPUT;
+	case SENSORS_FEATURE_TEMP: return SENSORS_SUBFEATURE_TEMP_INPUT;
+	case SENSORS_FEATURE_POWER: return SENSORS_SUBFEATURE_POWER_INPUT;
+	case SENSORS_FEATURE_ENERGY: return SENSORS_SUBFEATURE_ENERGY_INPUT;
+	case SENSORS_FEATURE_CURR: return SENSORS_SUBFEATURE_CURR_INPUT;
+	case SENSORS_FEATURE_HUMIDITY: return SENSORS_SUBFEATURE_HUMIDITY_INPUT;
+	default:
+		logg->logError(__FILE__, __LINE__, "Unsupported hwmon feature %i", type);
+		handleException();
+	}
+};
+
+class HwmonCounter : public DriverCounter {
+public:
+	HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature);
+	~HwmonCounter();
+
+	const char *getLabel() const { return label; }
+	const char *getTitle() const { return title; }
+	bool isDuplicate() const { return duplicate; }
+	const char *getDisplay() const { return display; }
+	const char *getCounterClass() const { return counter_class; }
+	const char *getUnit() const { return unit; }
+	int getModifier() const { return modifier; }
+
+	int64_t read();
+
+private:
+	void init(const sensors_chip_name *chip, const sensors_feature *feature);
+
+	const sensors_chip_name *chip;
+	const sensors_feature *feature;
+	char *label;
+	const char *title;
+	const char *display;
+	const char *counter_class;
+	const char *unit;
+	double previous_value;
+	int modifier;
+	int monotonic: 1,
+		duplicate : 1;
+
+	// Intentionally unimplemented
+	HwmonCounter(const HwmonCounter &);
+	HwmonCounter &operator=(const HwmonCounter &);
+};
+
+HwmonCounter::HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature) : DriverCounter(next, name), chip(chip), feature(feature), duplicate(false) {
+	label = sensors_get_label(chip, feature);
+
+	switch (feature->type) {
+	case SENSORS_FEATURE_IN:
+		title = "Voltage";
+		display = "maximum";
+		counter_class = "absolute";
+		unit = "V";
+		modifier = 1000;
+		monotonic = false;
+		break;
+	case SENSORS_FEATURE_FAN:
+		title = "Fan";
+		display = "average";
+		counter_class = "absolute";
+		unit = "RPM";
+		modifier = 1;
+		monotonic = false;
+		break;
+	case SENSORS_FEATURE_TEMP:
+		title = "Temperature";
+		display = "maximum";
+		counter_class = "absolute";
+		unit = "°C";
+		modifier = 1000;
+		monotonic = false;
+		break;
+	case SENSORS_FEATURE_POWER:
+		title = "Power";
+		display = "maximum";
+		counter_class = "absolute";
+		unit = "W";
+		modifier = 1000000;
+		monotonic = false;
+		break;
+	case SENSORS_FEATURE_ENERGY:
+		title = "Energy";
+		display = "accumulate";
+		counter_class = "delta";
+		unit = "J";
+		modifier = 1000000;
+		monotonic = true;
+		break;
+	case SENSORS_FEATURE_CURR:
+		title = "Current";
+		display = "maximum";
+		counter_class = "absolute";
+		unit = "A";
+		modifier = 1000;
+		monotonic = false;
+		break;
+	case SENSORS_FEATURE_HUMIDITY:
+		title = "Humidity";
+		display = "average";
+		counter_class = "absolute";
+		unit = "%";
+		modifier = 1000;
+		monotonic = false;
+		break;
+	default:
+		logg->logError(__FILE__, __LINE__, "Unsupported hwmon feature %i", feature->type);
+		handleException();
+	}
+
+	for (HwmonCounter * counter = static_cast<HwmonCounter *>(next); counter != NULL; counter = static_cast<HwmonCounter *>(counter->getNext())) {
+		if (strcmp(label, counter->getLabel()) == 0 && strcmp(title, counter->getTitle()) == 0) {
+			duplicate = true;
+			counter->duplicate = true;
+			break;
+		}
+	}
+}
+
+HwmonCounter::~HwmonCounter() {
+	free((void *)label);
+}
+
+int64_t HwmonCounter::read() {
+	double value;
+	double result;
+	const sensors_subfeature *subfeature;
+
+	// Keep in sync with the read check in HwmonDriver::readEvents
+	subfeature = sensors_get_subfeature(chip, feature, getInput(feature->type));
+	if (!subfeature) {
+		logg->logError(__FILE__, __LINE__, "No input value for hwmon sensor %s", label);
+		handleException();
+	}
+
+	if (sensors_get_value(chip, subfeature->number, &value) != 0) {
+		logg->logError(__FILE__, __LINE__, "Can't get input value for hwmon sensor %s", label);
+		handleException();
+	}
+
+	result = (monotonic ? value - previous_value : value);
+	previous_value = value;
+
+	return result;
+}
+
+HwmonDriver::HwmonDriver() {
+}
+
+HwmonDriver::~HwmonDriver() {
+	sensors_cleanup();
+}
+
+void HwmonDriver::readEvents(mxml_node_t *const) {
+	int err = sensors_init(NULL);
+	if (err) {
+		logg->logMessage("Failed to initialize libsensors! (%d)", err);
+		return;
+	}
+	sensors_sysfs_no_scaling = 1;
+
+	int chip_nr = 0;
+	const sensors_chip_name *chip;
+	while ((chip = sensors_get_detected_chips(NULL, &chip_nr))) {
+		int feature_nr = 0;
+		const sensors_feature *feature;
+		while ((feature = sensors_get_features(chip, &feature_nr))) {
+			// Keep in sync with HwmonCounter::read
+			// Can this counter be read?
+			double value;
+			const sensors_subfeature *const subfeature = sensors_get_subfeature(chip, feature, getInput(feature->type));
+			if ((subfeature == NULL) || (sensors_get_value(chip, subfeature->number, &value) != 0)) {
+				continue;
+			}
+
+			// Get the name of the counter
+			int len = sensors_snprintf_chip_name(NULL, 0, chip) + 1;
+			char *chip_name = new char[len];
+			sensors_snprintf_chip_name(chip_name, len, chip);
+			len = snprintf(NULL, 0, "hwmon_%s_%d_%d", chip_name, chip_nr, feature->number) + 1;
+			char *const name = new char[len];
+			snprintf(name, len, "hwmon_%s_%d_%d", chip_name, chip_nr, feature->number);
+			delete [] chip_name;
+
+			setCounters(new HwmonCounter(getCounters(), name, chip, feature));
+		}
+	}
+}
+
+void HwmonDriver::writeEvents(mxml_node_t *root) const {
+	root = mxmlNewElement(root, "category");
+	mxmlElementSetAttr(root, "name", "hwmon");
+
+	char buf[1024];
+	for (HwmonCounter *counter = static_cast<HwmonCounter *>(getCounters()); counter != NULL; counter = static_cast<HwmonCounter *>(counter->getNext())) {
+		mxml_node_t *node = mxmlNewElement(root, "event");
+		mxmlElementSetAttr(node, "counter", counter->getName());
+		mxmlElementSetAttr(node, "title", counter->getTitle());
+		if (counter->isDuplicate()) {
+			mxmlElementSetAttrf(node, "name", "%s (0x%x)", counter->getLabel(), counter->getKey());
+		} else {
+			mxmlElementSetAttr(node, "name", counter->getLabel());
+		}
+		mxmlElementSetAttr(node, "display", counter->getDisplay());
+		mxmlElementSetAttr(node, "class", counter->getCounterClass());
+		mxmlElementSetAttr(node, "units", counter->getUnit());
+		if (counter->getModifier() != 1) {
+			mxmlElementSetAttrf(node, "modifier", "%d", counter->getModifier());
+		}
+		if (strcmp(counter->getDisplay(), "average") == 0 || strcmp(counter->getDisplay(), "maximum") == 0) {
+			mxmlElementSetAttr(node, "average_selection", "yes");
+		}
+		snprintf(buf, sizeof(buf), "libsensors %s sensor %s (%s)", counter->getTitle(), counter->getLabel(), counter->getName());
+		mxmlElementSetAttr(node, "description", buf);
+	}
+}
+
+void HwmonDriver::start() {
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		counter->read();
+	}
+}
diff --git a/tools/gator/daemon/HwmonDriver.h b/tools/gator/daemon/HwmonDriver.h
new file mode 100644
index 000000000000..f28d825e3b7b
--- /dev/null
+++ b/tools/gator/daemon/HwmonDriver.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef HWMONDRIVER_H
+#define HWMONDRIVER_H
+
+#include "Driver.h"
+
+class HwmonDriver : public PolledDriver {
+public:
+	HwmonDriver();
+	~HwmonDriver();
+
+	void readEvents(mxml_node_t *const root);
+
+	void writeEvents(mxml_node_t *root) const;
+
+	void start();
+
+private:
+	// Intentionally unimplemented
+	HwmonDriver(const HwmonDriver &);
+	HwmonDriver &operator=(const HwmonDriver &);
+};
+
+#endif // HWMONDRIVER_H
diff --git a/tools/gator/daemon/KMod.cpp b/tools/gator/daemon/KMod.cpp
new file mode 100644
index 000000000000..fe9dc6a7e4f7
--- /dev/null
+++ b/tools/gator/daemon/KMod.cpp
@@ -0,0 +1,119 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "KMod.h"
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#include "ConfigurationXML.h"
+#include "Counter.h"
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+// Claim all the counters in /dev/gator/events
+bool KMod::claimCounter(const Counter &counter) const {
+	char text[128];
+	snprintf(text, sizeof(text), "/dev/gator/events/%s", counter.getType());
+	return access(text, F_OK) == 0;
+}
+
+void KMod::resetCounters() {
+	char base[128];
+	char text[128];
+
+	// Initialize all perf counters in the driver, i.e. set enabled to zero
+	struct dirent *ent;
+	DIR* dir = opendir("/dev/gator/events");
+	if (dir) {
+		while ((ent = readdir(dir)) != NULL) {
+			// skip hidden files, current dir, and parent dir
+			if (ent->d_name[0] == '.')
+				continue;
+			snprintf(base, sizeof(base), "/dev/gator/events/%s", ent->d_name);
+			snprintf(text, sizeof(text), "%s/enabled", base);
+			DriverSource::writeDriver(text, 0);
+			snprintf(text, sizeof(text), "%s/count", base);
+			DriverSource::writeDriver(text, 0);
+		}
+		closedir(dir);
+	}
+}
+
+static const char ARM_MALI_MIDGARD[] = "ARM_Mali-Midgard_";
+static const char ARM_MALI_T[] = "ARM_Mali-T";
+
+void KMod::setupCounter(Counter &counter) {
+	char base[128];
+	char text[128];
+	snprintf(base, sizeof(base), "/dev/gator/events/%s", counter.getType());
+
+	if ((strncmp(counter.getType(), ARM_MALI_MIDGARD, sizeof(ARM_MALI_MIDGARD) - 1) == 0 ||
+	     strncmp(counter.getType(), ARM_MALI_T, sizeof(ARM_MALI_T) - 1) == 0)) {
+		mIsMaliCapture = true;
+	}
+
+	snprintf(text, sizeof(text), "%s/enabled", base);
+	int enabled = true;
+	if (DriverSource::writeReadDriver(text, &enabled) || !enabled) {
+		counter.setEnabled(false);
+		return;
+	}
+
+	int value = 0;
+	snprintf(text, sizeof(text), "%s/key", base);
+	DriverSource::readIntDriver(text, &value);
+	counter.setKey(value);
+
+	snprintf(text, sizeof(text), "%s/cores", base);
+	if (DriverSource::readIntDriver(text, &value) == 0) {
+		counter.setCores(value);
+	}
+
+	snprintf(text, sizeof(text), "%s/event", base);
+	DriverSource::writeDriver(text, counter.getEvent());
+	snprintf(text, sizeof(text), "%s/count", base);
+	if (access(text, F_OK) == 0) {
+		int count = counter.getCount();
+		if (DriverSource::writeReadDriver(text, &count) && counter.getCount() > 0) {
+			logg->logError(__FILE__, __LINE__, "Cannot enable EBS for %s:%i with a count of %d\n", counter.getType(), counter.getEvent(), counter.getCount());
+			handleException();
+		}
+		counter.setCount(count);
+	} else if (counter.getCount() > 0) {
+		ConfigurationXML::remove();
+		logg->logError(__FILE__, __LINE__, "Event Based Sampling is only supported with kernel versions 3.0.0 and higher with CONFIG_PERF_EVENTS=y, and CONFIG_HW_PERF_EVENTS=y. The invalid configuration.xml has been removed.\n");
+		handleException();
+	}
+}
+
+int KMod::writeCounters(mxml_node_t *root) const {
+	struct dirent *ent;
+	mxml_node_t *counter;
+
+	// counters.xml is simply a file listing of /dev/gator/events
+	DIR* dir = opendir("/dev/gator/events");
+	if (dir == NULL) {
+		return 0;
+	}
+
+	int count = 0;
+	while ((ent = readdir(dir)) != NULL) {
+		// skip hidden files, current dir, and parent dir
+		if (ent->d_name[0] == '.')
+			continue;
+		counter = mxmlNewElement(root, "counter");
+		mxmlElementSetAttr(counter, "name", ent->d_name);
+		++count;
+	}
+	closedir(dir);
+
+	return count;
+}
diff --git a/tools/gator/daemon/KMod.h b/tools/gator/daemon/KMod.h
new file mode 100644
index 000000000000..900a60e87d24
--- /dev/null
+++ b/tools/gator/daemon/KMod.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef KMOD_H
+#define KMOD_H
+
+#include "Driver.h"
+
+// Driver for the gator kernel module
+class KMod : public Driver {
+public:
+	KMod() : mIsMaliCapture(false) {}
+	~KMod() {}
+
+	bool claimCounter(const Counter &counter) const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	int writeCounters(mxml_node_t *root) const;
+
+	bool isMaliCapture() const { return mIsMaliCapture; }
+
+private:
+	bool mIsMaliCapture;
+};
+
+#endif // KMOD_H
diff --git a/tools/gator/daemon/LocalCapture.cpp b/tools/gator/daemon/LocalCapture.cpp
new file mode 100644
index 000000000000..d2a4b799d7ac
--- /dev/null
+++ b/tools/gator/daemon/LocalCapture.cpp
@@ -0,0 +1,131 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "LocalCapture.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "SessionData.h"
+#include "Logging.h"
+#include "OlyUtility.h"
+#include "EventsXML.h"
+
+LocalCapture::LocalCapture() {}
+
+LocalCapture::~LocalCapture() {}
+
+void LocalCapture::createAPCDirectory(char* target_path) {
+	gSessionData->mAPCDir = createUniqueDirectory(target_path, ".apc");
+	if ((removeDirAndAllContents(gSessionData->mAPCDir) != 0 || mkdir(gSessionData->mAPCDir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) != 0)) {
+		logg->logError(__FILE__, __LINE__, "Unable to create directory %s", gSessionData->mAPCDir);
+		handleException();
+	}
+}
+
+void LocalCapture::write(char* string) {
+	char file[PATH_MAX];
+
+	// Set full path
+	snprintf(file, PATH_MAX, "%s/session.xml", gSessionData->mAPCDir);
+
+	// Write the file
+	if (util->writeToDisk(file, string) < 0) {
+		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file);
+		handleException();
+	}
+
+	// Write events XML
+	EventsXML eventsXML;
+	eventsXML.write(gSessionData->mAPCDir);
+}
+
+char* LocalCapture::createUniqueDirectory(const char* initialPath, const char* ending) {
+	char* output;
+	char path[PATH_MAX];
+
+	// Ensure the path is an absolute path, i.e. starts with a slash
+	if (initialPath == 0 || strlen(initialPath) == 0) {
+		logg->logError(__FILE__, __LINE__, "Missing -o command line option required for a local capture.");
+		handleException();
+	} else if (initialPath[0] != '/') {
+		if (getcwd(path, PATH_MAX) == 0) {
+			logg->logMessage("Unable to retrieve the current working directory");
+		}
+		strncat(path, "/", PATH_MAX - strlen(path) - 1);
+		strncat(path, initialPath, PATH_MAX - strlen(path) - 1);
+	} else {
+		strncpy(path, initialPath, PATH_MAX);
+		path[PATH_MAX - 1] = 0; // strncpy does not guarantee a null-terminated string
+	}
+
+	// Add ending if it is not already there
+	if (strcmp(&path[strlen(path) - strlen(ending)], ending) != 0) {
+		strncat(path, ending, PATH_MAX - strlen(path) - 1);
+	}
+
+	output = strdup(path);
+
+	return output;
+}
+
+int LocalCapture::removeDirAndAllContents(char* path) {
+	int error = 0;
+	struct stat mFileInfo;
+	// Does the path exist?
+	if (stat(path, &mFileInfo) == 0) {
+		// Is it a directory?
+		if (mFileInfo.st_mode & S_IFDIR) {
+			DIR * dir = opendir(path);
+			dirent* entry = readdir(dir);
+			while (entry) {
+				if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
+					char* newpath = (char*)malloc(strlen(path) + strlen(entry->d_name) + 2);
+					sprintf(newpath, "%s/%s", path, entry->d_name);
+					error = removeDirAndAllContents(newpath);
+					free(newpath);
+					if (error) {
+						break;
+					}
+				}
+				entry = readdir(dir);
+			}
+			closedir(dir);
+			if (error == 0) {
+				error = rmdir(path);
+			}
+		} else {
+			error = remove(path);
+		}
+	}
+	return error;
+}
+
+void LocalCapture::copyImages(ImageLinkList* ptr) {
+	char dstfilename[PATH_MAX];
+
+	while (ptr) {
+		strncpy(dstfilename, gSessionData->mAPCDir, PATH_MAX);
+		dstfilename[PATH_MAX - 1] = 0; // strncpy does not guarantee a null-terminated string
+		if (gSessionData->mAPCDir[strlen(gSessionData->mAPCDir) - 1] != '/') {
+			strncat(dstfilename, "/", PATH_MAX - strlen(dstfilename) - 1);
+		}
+		strncat(dstfilename, util->getFilePart(ptr->path), PATH_MAX - strlen(dstfilename) - 1);
+		if (util->copyFile(ptr->path, dstfilename)) {
+			logg->logMessage("copied file %s to %s", ptr->path, dstfilename);
+		} else {
+			logg->logMessage("copy of file %s to %s failed", ptr->path, dstfilename);
+		}
+
+		ptr = ptr->next;
+	}
+}
diff --git a/tools/gator/daemon/LocalCapture.h b/tools/gator/daemon/LocalCapture.h
new file mode 100644
index 000000000000..25d281f8328b
--- /dev/null
+++ b/tools/gator/daemon/LocalCapture.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LOCAL_CAPTURE_H__
+#define __LOCAL_CAPTURE_H__
+
+struct ImageLinkList;
+
+class LocalCapture {
+public:
+	LocalCapture();
+	~LocalCapture();
+	void write(char* string);
+	void copyImages(ImageLinkList* ptr);
+	void createAPCDirectory(char* target_path);
+private:
+	char* createUniqueDirectory(const char* path, const char* ending);
+	int removeDirAndAllContents(char* path);
+};
+
+#endif //__LOCAL_CAPTURE_H__
diff --git a/tools/gator/daemon/Logging.cpp b/tools/gator/daemon/Logging.cpp
new file mode 100644
index 000000000000..41ffa1a45151
--- /dev/null
+++ b/tools/gator/daemon/Logging.cpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Logging.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#ifdef WIN32
+#define MUTEX_INIT()    mLoggingMutex = CreateMutex(NULL, false, NULL);
+#define MUTEX_LOCK()    WaitForSingleObject(mLoggingMutex, 0xFFFFFFFF);
+#define MUTEX_UNLOCK()  ReleaseMutex(mLoggingMutex);
+#define snprintf _snprintf
+#else
+#include <pthread.h>
+#define MUTEX_INIT()    pthread_mutex_init(&mLoggingMutex, NULL)
+#define MUTEX_LOCK()    pthread_mutex_lock(&mLoggingMutex)
+#define MUTEX_UNLOCK()  pthread_mutex_unlock(&mLoggingMutex)
+#endif
+
+// Global thread-safe logging
+Logging* logg = NULL;
+
+Logging::Logging(bool debug) {
+	mDebug = debug;
+	MUTEX_INIT();
+
+	strcpy(mErrBuf, "Unknown Error");
+	strcpy(mLogBuf, "Unknown Message");
+}
+
+Logging::~Logging() {
+}
+
+void Logging::logError(const char* file, int line, const char* fmt, ...) {
+	va_list args;
+
+	MUTEX_LOCK();
+	if (mDebug) {
+		snprintf(mErrBuf, sizeof(mErrBuf), "ERROR[%s:%d]: ", file, line);
+	} else {
+		mErrBuf[0] = 0;
+	}
+
+	va_start(args, fmt);
+	vsnprintf(mErrBuf + strlen(mErrBuf), sizeof(mErrBuf) - 2 - strlen(mErrBuf), fmt, args); //  subtract 2 for \n and \0
+	va_end(args);
+
+	if (strlen(mErrBuf) > 0) {
+		strcat(mErrBuf, "\n");
+	}
+	MUTEX_UNLOCK();
+}
+
+void Logging::logMessage(const char* fmt, ...) {
+	if (mDebug) {
+		va_list args;
+
+		MUTEX_LOCK();
+		strcpy(mLogBuf, "INFO: ");
+
+		va_start(args, fmt);
+		vsnprintf(mLogBuf + strlen(mLogBuf), sizeof(mLogBuf) - 2 - strlen(mLogBuf), fmt, args); //  subtract 2 for \n and \0
+		va_end(args);
+		strcat(mLogBuf, "\n");
+
+		fprintf(stdout, "%s", mLogBuf);
+		fflush(stdout);
+		MUTEX_UNLOCK();
+	}
+}
diff --git a/tools/gator/daemon/Logging.h b/tools/gator/daemon/Logging.h
new file mode 100644
index 000000000000..09e93ff13f7a
--- /dev/null
+++ b/tools/gator/daemon/Logging.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LOGGING_H__
+#define __LOGGING_H__
+
+#include <pthread.h>
+
+#define DRIVER_ERROR "\n Driver issue:\n  >> gator.ko must be built against the current kernel version & configuration\n  >> gator.ko should be co-located with gatord in the same directory\n  >>   OR insmod gator.ko prior to launching gatord"
+
+class Logging {
+public:
+	Logging(bool debug);
+	~Logging();
+	void logError(const char* file, int line, const char* fmt, ...);
+	void logMessage(const char* fmt, ...);
+	char* getLastError() {return mErrBuf;}
+	char* getLastMessage() {return mLogBuf;}
+
+private:
+	char mErrBuf[4096]; // Arbitrarily large buffer to hold a string
+	char mLogBuf[4096]; // Arbitrarily large buffer to hold a string
+	bool mDebug;
+	pthread_mutex_t mLoggingMutex;
+};
+
+extern Logging* logg;
+
+extern void handleException() __attribute__ ((noreturn));
+
+#endif //__LOGGING_H__
diff --git a/tools/gator/daemon/Makefile b/tools/gator/daemon/Makefile
new file mode 100644
index 000000000000..27531b438b63
--- /dev/null
+++ b/tools/gator/daemon/Makefile
@@ -0,0 +1,22 @@
+#
+# Makefile for ARM Streamline - Gator Daemon
+#
+
+# Uncomment and define CROSS_COMPILE if it is not already defined
+# CROSS_COMPILE=/path/to/cross-compiler/arm-linux-gnueabihf-
+# NOTE: This toolchain uses the hardfloat abi by default. For non-hardfloat
+# targets run 'make SOFTFLOAT=1 SYSROOT=/path/to/sysroot', see
+# README_Streamline.txt for more details
+
+CC = $(CROSS_COMPILE)gcc
+CXX = $(CROSS_COMPILE)g++
+
+ifeq ($(SOFTFLOAT),1)
+	CPPFLAGS += -marm -mthumb-interwork -march=armv4t -mfloat-abi=soft
+	LDFLAGS += -marm -march=armv4t -mfloat-abi=soft
+endif
+ifneq ($(SYSROOT),)
+	LDFLAGS += --sysroot=$(SYSROOT)
+endif
+
+include common.mk
diff --git a/tools/gator/daemon/Makefile_aarch64 b/tools/gator/daemon/Makefile_aarch64
new file mode 100644
index 000000000000..efd1fa002182
--- /dev/null
+++ b/tools/gator/daemon/Makefile_aarch64
@@ -0,0 +1,12 @@
+#
+# Makefile for ARM Streamline - Gator Daemon
+# make -f Makefile_aarch64
+#
+
+# Uncomment and define CROSS_COMPILE if it is not already defined
+# CROSS_COMPILE=/path/to/cross-compiler/aarch64-linux-gnu-
+
+CC = $(CROSS_COMPILE)gcc
+CXX = $(CROSS_COMPILE)g++
+
+include common.mk
diff --git a/tools/gator/daemon/MaliVideoDriver.cpp b/tools/gator/daemon/MaliVideoDriver.cpp
new file mode 100644
index 000000000000..5eef2643ab15
--- /dev/null
+++ b/tools/gator/daemon/MaliVideoDriver.cpp
@@ -0,0 +1,191 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "MaliVideoDriver.h"
+
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Counter.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+// From instr/src/mve_instr_comm_protocol.h
+typedef enum mve_instr_configuration_type {
+	MVE_INSTR_RAW         = 1 << 0,
+	MVE_INSTR_COUNTERS    = 1 << 1,
+	MVE_INSTR_EVENTS      = 1 << 2,
+	MVE_INSTR_ACTIVITIES  = 1 << 3,
+
+	// Raw always pushed regardless
+	MVE_INSTR_PULL        = 1 << 12,
+	// Raw always unpacked regardless
+	MVE_INSTR_PACKED_COMM = 1 << 13,
+	// Don’t send ACKt response
+	MVE_INSTR_NO_AUTO_ACK   = 1 << 14,
+} mve_instr_configuration_type_t;
+
+static const char COUNTER[] = "ARM_Mali-V500_cnt";
+static const char EVENT[] = "ARM_Mali-V500_evn";
+static const char ACTIVITY[] = "ARM_Mali-V500_act";
+
+class MaliVideoCounter : public DriverCounter {
+public:
+	MaliVideoCounter(DriverCounter *next, const char *name, const MaliVideoCounterType type, const int id) : DriverCounter(next, name), mType(type), mId(id) {
+	}
+
+	~MaliVideoCounter() {
+	}
+
+	MaliVideoCounterType getType() const { return mType; }
+	int getId() const { return mId; }
+
+private:
+	const MaliVideoCounterType mType;
+	// Mali Video id
+	const int mId;
+};
+
+MaliVideoDriver::MaliVideoDriver() {
+}
+
+MaliVideoDriver::~MaliVideoDriver() {
+}
+
+void MaliVideoDriver::readEvents(mxml_node_t *const xml) {
+	mxml_node_t *node = xml;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			// Ignore
+		} else if (strncmp(counter, COUNTER, sizeof(COUNTER) - 1) == 0) {
+			const int i = strtol(counter + sizeof(COUNTER) - 1, NULL, 10);
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_COUNTER, i));
+		} else if (strncmp(counter, EVENT, sizeof(EVENT) - 1) == 0) {
+			const int i = strtol(counter + sizeof(EVENT) - 1, NULL, 10);
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_EVENT, i));
+		} else if (strncmp(counter, ACTIVITY, sizeof(ACTIVITY) - 1) == 0) {
+			const int i = strtol(counter + sizeof(ACTIVITY) - 1, NULL, 10);
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_ACTIVITY, i));
+		}
+	}
+}
+
+int MaliVideoDriver::writeCounters(mxml_node_t *root) const {
+	if (access("/dev/mv500", F_OK) != 0) {
+		return 0;
+	}
+
+	return super::writeCounters(root);
+}
+
+void MaliVideoDriver::marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos) {
+	// size
+	int numEnabled = 0;
+	for (MaliVideoCounter *counter = static_cast<MaliVideoCounter *>(getCounters()); counter != NULL; counter = static_cast<MaliVideoCounter *>(counter->getNext())) {
+		if (counter->isEnabled() && (counter->getType() == type)) {
+			++numEnabled;
+		}
+	}
+	Buffer::packInt(buf, bufsize, pos, numEnabled*sizeof(uint32_t));
+	for (MaliVideoCounter *counter = static_cast<MaliVideoCounter *>(getCounters()); counter != NULL; counter = static_cast<MaliVideoCounter *>(counter->getNext())) {
+		if (counter->isEnabled() && (counter->getType() == type)) {
+			Buffer::packInt(buf, bufsize, pos, counter->getId());
+		}
+	}
+}
+
+static bool writeAll(const int mveUds, const char *const buf, const int pos) {
+	int written = 0;
+	while (written < pos) {
+		size_t bytes = ::write(mveUds, buf + written, pos - written);
+		if (bytes <= 0) {
+			logg->logMessage("%s(%s:%i): write failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+		written += bytes;
+	}
+
+	return true;
+}
+
+bool MaliVideoDriver::start(const int mveUds) {
+	char buf[256];
+	int pos = 0;
+
+	// code - MVE_INSTR_STARTUP
+	buf[pos++] = 'C';
+	buf[pos++] = 'L';
+	buf[pos++] = 'N';
+	buf[pos++] = 'T';
+	// size
+	Buffer::packInt(buf, sizeof(buf), pos, sizeof(uint32_t));
+	// client_version_number
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+
+	// code - MVE_INSTR_CONFIGURE
+	buf[pos++] = 'C';
+	buf[pos++] = 'N';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	// size
+	Buffer::packInt(buf, sizeof(buf), pos, 5*sizeof(uint32_t));
+	// configuration
+	Buffer::packInt(buf, sizeof(buf), pos, MVE_INSTR_COUNTERS | MVE_INSTR_EVENTS | MVE_INSTR_ACTIVITIES | MVE_INSTR_PACKED_COMM);
+	// communication_protocol_version
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+	// data_protocol_version
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+	// sample_rate - convert samples/second to ms/sample
+	Buffer::packInt(buf, sizeof(buf), pos, 1000/gSessionData->mSampleRate);
+	// live_rate - convert ns/flush to ms/flush
+	Buffer::packInt(buf, sizeof(buf), pos, gSessionData->mLiveRate/1000000);
+
+	// code - MVE_INSTR_ENABLE_COUNTERS
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'c';
+	marshalEnable(MVCT_COUNTER, buf, sizeof(buf), pos);
+
+	// code - MVE_INSTR_ENABLE_EVENTS
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'e';
+	marshalEnable(MVCT_EVENT, buf, sizeof(buf), pos);
+
+	// code - MVE_INSTR_ENABLE_ACTIVITIES
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'a';
+	marshalEnable(MVCT_ACTIVITY, buf, sizeof(buf), pos);
+
+	return writeAll(mveUds, buf, pos);
+}
+
+void MaliVideoDriver::stop(const int mveUds) {
+	char buf[8];
+	int pos = 0;
+
+	// code - MVE_INSTR_STOP
+	buf[pos++] = 'S';
+	buf[pos++] = 'T';
+	buf[pos++] = 'O';
+	buf[pos++] = 'P';
+	marshalEnable(MVCT_COUNTER, buf, sizeof(buf), pos);
+
+	writeAll(mveUds, buf, pos);
+
+	close(mveUds);
+}
diff --git a/tools/gator/daemon/MaliVideoDriver.h b/tools/gator/daemon/MaliVideoDriver.h
new file mode 100644
index 000000000000..204a57a447ac
--- /dev/null
+++ b/tools/gator/daemon/MaliVideoDriver.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MALIVIDEODRIVER_H
+#define MALIVIDEODRIVER_H
+
+#include "Driver.h"
+
+class MaliVideoCounter;
+
+enum MaliVideoCounterType {
+	MVCT_COUNTER,
+	MVCT_EVENT,
+	MVCT_ACTIVITY,
+};
+
+class MaliVideoDriver : public SimpleDriver {
+private:
+	typedef SimpleDriver super;
+
+public:
+	MaliVideoDriver();
+	~MaliVideoDriver();
+
+	void readEvents(mxml_node_t *const root);
+
+	int writeCounters(mxml_node_t *root) const;
+
+	bool start(const int mveUds);
+	void stop(const int mveUds);
+
+private:
+	void marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos);
+
+	// Intentionally unimplemented
+	MaliVideoDriver(const MaliVideoDriver &);
+	MaliVideoDriver &operator=(const MaliVideoDriver &);
+};
+
+#endif // MALIVIDEODRIVER_H
diff --git a/tools/gator/daemon/MemInfoDriver.cpp b/tools/gator/daemon/MemInfoDriver.cpp
new file mode 100644
index 000000000000..cce15c16fcdc
--- /dev/null
+++ b/tools/gator/daemon/MemInfoDriver.cpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "MemInfoDriver.h"
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class MemInfoCounter : public DriverCounter {
+public:
+	MemInfoCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~MemInfoCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+
+	// Intentionally unimplemented
+	MemInfoCounter(const MemInfoCounter &);
+	MemInfoCounter &operator=(const MemInfoCounter &);
+};
+
+MemInfoCounter::MemInfoCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value) {
+}
+
+MemInfoCounter::~MemInfoCounter() {
+}
+
+int64_t MemInfoCounter::read() {
+	return *mValue;
+}
+
+MemInfoDriver::MemInfoDriver() : mBuf(), mMemUsed(0), mMemFree(0), mBuffers(0) {
+}
+
+MemInfoDriver::~MemInfoDriver() {
+}
+
+void MemInfoDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_memused2"), &mMemUsed));
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_memfree"), &mMemFree));
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_bufferram"), &mBuffers));
+}
+
+void MemInfoDriver::read(Buffer *const buffer) {
+	if (!countersEnabled()) {
+		return;
+	}
+
+	if (!mBuf.read("/proc/meminfo")) {
+		logg->logError(__FILE__, __LINE__, "Failed to read /proc/meminfo");
+		handleException();
+	}
+
+	char *key = mBuf.getBuf();
+	char *colon;
+	int64_t memTotal = 0;
+	while ((colon = strchr(key, ':')) != NULL) {
+		char *end = strchr(colon + 1, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+		*colon = '\0';
+
+		if (strcmp(key, "MemTotal") == 0) {
+			memTotal = strtoll(colon + 1, NULL, 10) << 10;
+		} else if (strcmp(key, "MemFree") == 0) {
+			mMemFree = strtoll(colon + 1, NULL, 10) << 10;
+		} else if (strcmp(key, "Buffers") == 0) {
+			mBuffers = strtoll(colon + 1, NULL, 10) << 10;
+		}
+
+		if (end == NULL) {
+			break;
+		}
+		key = end + 1;
+	}
+
+	mMemUsed = memTotal - mMemFree;
+
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/MemInfoDriver.h b/tools/gator/daemon/MemInfoDriver.h
new file mode 100644
index 000000000000..eb1b0417f309
--- /dev/null
+++ b/tools/gator/daemon/MemInfoDriver.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MEMINFODRIVER_H
+#define MEMINFODRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class MemInfoDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	MemInfoDriver();
+	~MemInfoDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void read(Buffer *const buffer);
+
+private:
+	DynBuf mBuf;
+	int64_t mMemUsed;
+	int64_t mMemFree;
+	int64_t mBuffers;
+
+	// Intentionally unimplemented
+	MemInfoDriver(const MemInfoDriver &);
+	MemInfoDriver &operator=(const MemInfoDriver &);
+};
+
+#endif // MEMINFODRIVER_H
diff --git a/tools/gator/daemon/Monitor.cpp b/tools/gator/daemon/Monitor.cpp
new file mode 100644
index 000000000000..74f22ee29fec
--- /dev/null
+++ b/tools/gator/daemon/Monitor.cpp
@@ -0,0 +1,82 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Monitor.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "Logging.h"
+
+Monitor::Monitor() : mFd(-1) {
+}
+
+Monitor::~Monitor() {
+	if (mFd >= 0) {
+		::close(mFd);
+	}
+}
+
+void Monitor::close() {
+	if (mFd >= 0) {
+		::close(mFd);
+		mFd = -1;
+	}
+}
+
+bool Monitor::init() {
+#ifdef EPOLL_CLOEXEC
+	mFd = epoll_create1(EPOLL_CLOEXEC);
+#else
+	mFd = epoll_create(16);
+#endif
+	if (mFd < 0) {
+		logg->logMessage("%s(%s:%i): epoll_create1 failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+#ifndef EPOLL_CLOEXEC
+  int fdf = fcntl(mFd, F_GETFD);
+  if ((fdf == -1) || (fcntl(mFd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		logg->logMessage("%s(%s:%i): fcntl failed", __FUNCTION__, __FILE__, __LINE__);
+    ::close(mFd);
+    return -1;
+  }
+#endif
+
+	return true;
+}
+
+bool Monitor::add(const int fd) {
+	struct epoll_event event;
+	memset(&event, 0, sizeof(event));
+	event.data.fd = fd;
+	event.events = EPOLLIN;
+	if (epoll_ctl(mFd, EPOLL_CTL_ADD, fd, &event) != 0) {
+		logg->logMessage("%s(%s:%i): epoll_ctl failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	return true;
+}
+
+int Monitor::wait(struct epoll_event *const events, int maxevents, int timeout) {
+	int result = epoll_wait(mFd, events, maxevents, timeout);
+	if (result < 0) {
+		// Ignore if the call was interrupted as this will happen when SIGINT is received
+		if (errno == EINTR) {
+			result = 0;
+		} else {
+			logg->logMessage("%s(%s:%i): epoll_wait failed", __FUNCTION__, __FILE__, __LINE__);
+		}
+	}
+
+	return result;
+}
diff --git a/tools/gator/daemon/Monitor.h b/tools/gator/daemon/Monitor.h
new file mode 100644
index 000000000000..7194e0e4ca50
--- /dev/null
+++ b/tools/gator/daemon/Monitor.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MONITOR_H
+#define MONITOR_H
+
+#include <sys/epoll.h>
+
+class Monitor {
+public:
+	Monitor();
+	~Monitor();
+
+	void close();
+	bool init();
+	bool add(const int fd);
+	int wait(struct epoll_event *const events, int maxevents, int timeout);
+
+private:
+
+	int mFd;
+
+	// Intentionally unimplemented
+	Monitor(const Monitor &);
+	Monitor &operator=(const Monitor &);
+};
+
+#endif // MONITOR_H
diff --git a/tools/gator/daemon/NetDriver.cpp b/tools/gator/daemon/NetDriver.cpp
new file mode 100644
index 000000000000..e75c0695779a
--- /dev/null
+++ b/tools/gator/daemon/NetDriver.cpp
@@ -0,0 +1,129 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+// Define to get format macros from inttypes.h
+#define __STDC_FORMAT_MACROS
+
+#include "NetDriver.h"
+
+#include <inttypes.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class NetCounter : public DriverCounter {
+public:
+	NetCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~NetCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+	int64_t mPrev;
+
+	// Intentionally unimplemented
+	NetCounter(const NetCounter &);
+	NetCounter &operator=(const NetCounter &);
+};
+
+NetCounter::NetCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value), mPrev(0) {
+}
+
+NetCounter::~NetCounter() {
+}
+
+int64_t NetCounter::read() {
+	int64_t result = *mValue - mPrev;
+	mPrev = *mValue;
+	return result;
+}
+
+NetDriver::NetDriver() : mBuf(), mReceiveBytes(0), mTransmitBytes(0) {
+}
+
+NetDriver::~NetDriver() {
+}
+
+void NetDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new NetCounter(getCounters(), strdup("Linux_net_rx"), &mReceiveBytes));
+	setCounters(new NetCounter(getCounters(), strdup("Linux_net_tx"), &mTransmitBytes));
+}
+
+bool NetDriver::doRead() {
+	if (!countersEnabled()) {
+		return true;
+	}
+
+	if (!mBuf.read("/proc/net/dev")) {
+		return false;
+	}
+
+	// Skip the header
+	char *key;
+	if (((key = strchr(mBuf.getBuf(), '\n')) == NULL) ||
+			((key = strchr(key + 1, '\n')) == NULL)) {
+		return false;
+	}
+	key = key + 1;
+
+	mReceiveBytes = 0;
+	mTransmitBytes = 0;
+
+	char *colon;
+	while ((colon = strchr(key, ':')) != NULL) {
+		char *end = strchr(colon + 1, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+		*colon = '\0';
+
+		int64_t receiveBytes;
+		int64_t transmitBytes;
+		const int count = sscanf(colon + 1, " %" SCNu64 " %*u %*u %*u %*u %*u %*u %*u %" SCNu64, &receiveBytes, &transmitBytes);
+		if (count != 2) {
+			return false;
+		}
+		mReceiveBytes += receiveBytes;
+		mTransmitBytes += transmitBytes;
+
+		if (end == NULL) {
+			break;
+		}
+		key = end + 1;
+	}
+
+	return true;
+}
+
+void NetDriver::start() {
+	if (!doRead()) {
+		logg->logError(__FILE__, __LINE__, "Unable to read network stats");
+		handleException();
+	}
+	// Initialize previous values
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		counter->read();
+	}
+}
+
+void NetDriver::read(Buffer *const buffer) {
+	if (!doRead()) {
+		logg->logError(__FILE__, __LINE__, "Unable to read network stats");
+		handleException();
+	}
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/NetDriver.h b/tools/gator/daemon/NetDriver.h
new file mode 100644
index 000000000000..50ff850bfc6d
--- /dev/null
+++ b/tools/gator/daemon/NetDriver.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef NETDRIVER_H
+#define NETDRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class NetDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	NetDriver();
+	~NetDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void start();
+	void read(Buffer *const buffer);
+
+private:
+	bool doRead();
+
+	DynBuf mBuf;
+	int64_t mReceiveBytes;
+	int64_t mTransmitBytes;
+
+	// Intentionally unimplemented
+	NetDriver(const NetDriver &);
+	NetDriver &operator=(const NetDriver &);
+};
+
+#endif // NETDRIVER_H
diff --git a/tools/gator/daemon/OlySocket.cpp b/tools/gator/daemon/OlySocket.cpp
new file mode 100644
index 000000000000..aa0ce4929916
--- /dev/null
+++ b/tools/gator/daemon/OlySocket.cpp
@@ -0,0 +1,324 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "OlySocket.h"
+
+#include <stdio.h>
+#include <string.h>
+#ifdef WIN32
+#include <Winsock2.h>
+#include <ws2tcpip.h>
+#else
+#include <netinet/in.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <netdb.h>
+#include <fcntl.h>
+#endif
+
+#include "Logging.h"
+
+#ifdef WIN32
+#define CLOSE_SOCKET(x) closesocket(x)
+#define SHUTDOWN_RX_TX SD_BOTH
+#define snprintf       _snprintf
+#else
+#define CLOSE_SOCKET(x) close(x)
+#define SHUTDOWN_RX_TX SHUT_RDWR
+#endif
+
+int socket_cloexec(int domain, int type, int protocol) {
+#ifdef SOCK_CLOEXEC
+  return socket(domain, type | SOCK_CLOEXEC, protocol);
+#else
+  int sock = socket(domain, type, protocol);
+#ifdef FD_CLOEXEC
+  if (sock < 0) {
+    return -1;
+  }
+  int fdf = fcntl(sock, F_GETFD);
+  if ((fdf == -1) || (fcntl(sock, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+    close(sock);
+    return -1;
+  }
+#endif
+  return sock;
+#endif
+}
+
+int accept_cloexec(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
+  int sock;
+#ifdef SOCK_CLOEXEC
+  sock = accept4(sockfd, addr, addrlen, SOCK_CLOEXEC);
+  if (sock >= 0) {
+    return sock;
+  }
+  // accept4 with SOCK_CLOEXEC may not work on all kernels, so fallback
+#endif
+  sock = accept(sockfd, addr, addrlen);
+#ifdef FD_CLOEXEC
+  if (sock < 0) {
+    return -1;
+  }
+  int fdf = fcntl(sock, F_GETFD);
+  if ((fdf == -1) || (fcntl(sock, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+    close(sock);
+    return -1;
+  }
+#endif
+  return sock;
+}
+
+OlyServerSocket::OlyServerSocket(int port) {
+#ifdef WIN32
+  WSADATA wsaData;
+  if (WSAStartup(0x0202, &wsaData) != 0) {
+    logg->logError(__FILE__, __LINE__, "Windows socket initialization failed");
+    handleException();
+  }
+#endif
+
+  createServerSocket(port);
+}
+
+OlySocket::OlySocket(int socketID) : mSocketID(socketID) {
+}
+
+#ifndef WIN32
+
+#define MIN(A, B) ({ \
+  const __typeof__(A) __a = A; \
+  const __typeof__(B) __b = B; \
+  __a > __b ? __b : __a; \
+})
+
+OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) {
+  // Create socket
+  mFDServer = socket_cloexec(PF_UNIX, SOCK_STREAM, 0);
+  if (mFDServer < 0) {
+    logg->logError(__FILE__, __LINE__, "Error creating server socket");
+    handleException();
+  }
+
+  // Create sockaddr_in structure, ensuring non-populated fields are zero
+  struct sockaddr_un sockaddr;
+  memset((void*)&sockaddr, 0, sizeof(sockaddr));
+  sockaddr.sun_family = AF_UNIX;
+  memcpy(sockaddr.sun_path, path, MIN(pathSize, sizeof(sockaddr.sun_path)));
+  sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0';
+
+  // Bind the socket to an address
+  if (bind(mFDServer, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) {
+    logg->logError(__FILE__, __LINE__, "Binding of server socket failed.");
+    handleException();
+  }
+
+  // Listen for connections on this socket
+  if (listen(mFDServer, 1) < 0) {
+    logg->logError(__FILE__, __LINE__, "Listening of server socket failed");
+    handleException();
+  }
+}
+
+int OlySocket::connect(const char* path, const size_t pathSize) {
+  int fd = socket_cloexec(PF_UNIX, SOCK_STREAM, 0);
+  if (fd < 0) {
+    return -1;
+  }
+
+  // Create sockaddr_in structure, ensuring non-populated fields are zero
+  struct sockaddr_un sockaddr;
+  memset((void*)&sockaddr, 0, sizeof(sockaddr));
+  sockaddr.sun_family = AF_UNIX;
+  memcpy(sockaddr.sun_path, path, MIN(pathSize, sizeof(sockaddr.sun_path)));
+  sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0';
+
+  if (::connect(fd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) {
+    close(fd);
+    return -1;
+  }
+
+  return fd;
+}
+
+#endif
+
+OlySocket::~OlySocket() {
+  if (mSocketID > 0) {
+    CLOSE_SOCKET(mSocketID);
+  }
+}
+
+OlyServerSocket::~OlyServerSocket() {
+  if (mFDServer > 0) {
+    CLOSE_SOCKET(mFDServer);
+  }
+}
+
+void OlySocket::shutdownConnection() {
+  // Shutdown is primarily used to unblock other threads that are blocking on send/receive functions
+  shutdown(mSocketID, SHUTDOWN_RX_TX);
+}
+
+void OlySocket::closeSocket() {
+  // Used for closing an accepted socket but keeping the server socket active
+  if (mSocketID > 0) {
+    CLOSE_SOCKET(mSocketID);
+    mSocketID = -1;
+  }
+}
+
+void OlyServerSocket::closeServerSocket() {
+  if (CLOSE_SOCKET(mFDServer) != 0) {
+    logg->logError(__FILE__, __LINE__, "Failed to close server socket.");
+    handleException();
+  }
+  mFDServer = 0;
+}
+
+void OlyServerSocket::createServerSocket(int port) {
+  int family = AF_INET6;
+
+  // Create socket
+  mFDServer = socket_cloexec(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
+  if (mFDServer < 0) {
+    family = AF_INET;
+    mFDServer = socket_cloexec(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (mFDServer < 0) {
+      logg->logError(__FILE__, __LINE__, "Error creating server socket");
+      handleException();
+    }
+  }
+
+  // Enable address reuse, another solution would be to create the server socket once and only close it when the object exits
+  int on = 1;
+  if (setsockopt(mFDServer, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) {
+    logg->logError(__FILE__, __LINE__, "Setting server socket options failed");
+    handleException();
+  }
+
+  // Create sockaddr_in structure, ensuring non-populated fields are zero
+  struct sockaddr_in6 sockaddr;
+  memset((void*)&sockaddr, 0, sizeof(sockaddr));
+  sockaddr.sin6_family = family;
+  sockaddr.sin6_port = htons(port);
+  sockaddr.sin6_addr = in6addr_any;
+
+  // Bind the socket to an address
+  if (bind(mFDServer, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) {
+    logg->logError(__FILE__, __LINE__, "Binding of server socket failed.\nIs an instance already running?");
+    handleException();
+  }
+
+  // Listen for connections on this socket
+  if (listen(mFDServer, 1) < 0) {
+    logg->logError(__FILE__, __LINE__, "Listening of server socket failed");
+    handleException();
+  }
+}
+
+// mSocketID is always set to the most recently accepted connection
+// The user of this class should maintain the different socket connections, e.g. by forking the process
+int OlyServerSocket::acceptConnection() {
+  int socketID;
+  if (mFDServer <= 0) {
+    logg->logError(__FILE__, __LINE__, "Attempting multiple connections on a single connection server socket or attempting to accept on a client socket");
+    handleException();
+  }
+
+  // Accept a connection, note that this call blocks until a client connects
+  socketID = accept_cloexec(mFDServer, NULL, NULL);
+  if (socketID < 0) {
+    logg->logError(__FILE__, __LINE__, "Socket acceptance failed");
+    handleException();
+  }
+  return socketID;
+}
+
+void OlySocket::send(const char* buffer, int size) {
+  if (size <= 0 || buffer == NULL) {
+    return;
+  }
+
+  while (size > 0) {
+    int n = ::send(mSocketID, buffer, size, 0);
+    if (n < 0) {
+      logg->logError(__FILE__, __LINE__, "Socket send error");
+      handleException();
+    }
+    size -= n;
+    buffer += n;
+  }
+}
+
+// Returns the number of bytes received
+int OlySocket::receive(char* buffer, int size) {
+  if (size <= 0 || buffer == NULL) {
+    return 0;
+  }
+
+  int bytes = recv(mSocketID, buffer, size, 0);
+  if (bytes < 0) {
+    logg->logError(__FILE__, __LINE__, "Socket receive error");
+    handleException();
+  } else if (bytes == 0) {
+    logg->logMessage("Socket disconnected");
+    return -1;
+  }
+  return bytes;
+}
+
+// Receive exactly size bytes of data. Note, this function will block until all bytes are received
+int OlySocket::receiveNBytes(char* buffer, int size) {
+  int bytes = 0;
+  while (size > 0 && buffer != NULL) {
+    bytes = recv(mSocketID, buffer, size, 0);
+    if (bytes < 0) {
+      logg->logError(__FILE__, __LINE__, "Socket receive error");
+      handleException();
+    } else if (bytes == 0) {
+      logg->logMessage("Socket disconnected");
+      return -1;
+    }
+    buffer += bytes;
+    size -= bytes;
+  }
+  return bytes;
+}
+
+// Receive data until a carriage return, line feed, or null is encountered, or the buffer fills
+int OlySocket::receiveString(char* buffer, int size) {
+  int bytes_received = 0;
+  bool found = false;
+
+  if (buffer == 0) {
+    return 0;
+  }
+
+  while (!found && bytes_received < size) {
+    // Receive a single character
+    int bytes = recv(mSocketID, &buffer[bytes_received], 1, 0);
+    if (bytes < 0) {
+      logg->logError(__FILE__, __LINE__, "Socket receive error");
+      handleException();
+    } else if (bytes == 0) {
+      logg->logMessage("Socket disconnected");
+      return -1;
+    }
+
+    // Replace carriage returns and line feeds with zero
+    if (buffer[bytes_received] == '\n' || buffer[bytes_received] == '\r' || buffer[bytes_received] == '\0') {
+      buffer[bytes_received] = '\0';
+      found = true;
+    }
+
+    bytes_received++;
+  }
+
+  return bytes_received;
+}
diff --git a/tools/gator/daemon/OlySocket.h b/tools/gator/daemon/OlySocket.h
new file mode 100644
index 000000000000..6b53b01fc3ee
--- /dev/null
+++ b/tools/gator/daemon/OlySocket.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OLY_SOCKET_H__
+#define __OLY_SOCKET_H__
+
+#include <stddef.h>
+
+#ifdef WIN32
+typedef socklen_t int;
+#else
+#include <sys/socket.h>
+#endif
+
+class OlySocket {
+public:
+#ifndef WIN32
+  static int connect(const char* path, const size_t pathSize);
+#endif
+
+  OlySocket(int socketID);
+  ~OlySocket();
+
+  void closeSocket();
+  void shutdownConnection();
+  void send(const char* buffer, int size);
+  int receive(char* buffer, int size);
+  int receiveNBytes(char* buffer, int size);
+  int receiveString(char* buffer, int size);
+
+  bool isValid() const { return mSocketID >= 0; }
+
+private:
+  int mSocketID;
+};
+
+class OlyServerSocket {
+public:
+  OlyServerSocket(int port);
+#ifndef WIN32
+  OlyServerSocket(const char* path, const size_t pathSize);
+#endif
+  ~OlyServerSocket();
+
+  int acceptConnection();
+  void closeServerSocket();
+
+  int getFd() { return mFDServer; }
+
+private:
+  int mFDServer;
+
+  void createServerSocket(int port);
+};
+
+int socket_cloexec(int domain, int type, int protocol);
+int accept_cloexec(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+
+#endif //__OLY_SOCKET_H__
diff --git a/tools/gator/daemon/OlyUtility.cpp b/tools/gator/daemon/OlyUtility.cpp
new file mode 100644
index 000000000000..45340a27d9fa
--- /dev/null
+++ b/tools/gator/daemon/OlyUtility.cpp
@@ -0,0 +1,227 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "OlyUtility.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if defined(WIN32)
+#include <windows.h>
+#elif defined(__linux__)
+#include <unistd.h>
+#elif defined(DARWIN)
+#include <mach-o/dyld.h>
+#endif
+
+OlyUtility* util = NULL;
+
+bool OlyUtility::stringToBool(const char* string, bool defValue) {
+  char value[32];
+
+  if (string == NULL) {
+    return defValue;
+  }
+
+  strncpy(value, string, sizeof(value));
+  if (value[0] == 0) {
+    return defValue;
+  }
+  value[sizeof(value) - 1] = 0; // strncpy does not guarantee a null-terminated string
+
+  // Convert to lowercase
+  int i = 0;
+  while (value[i]) {
+    value[i] = tolower(value[i]);
+    i++;
+  }
+
+  if (strcmp(value, "true") == 0 || strcmp(value, "yes") == 0 || strcmp(value, "1") == 0 || strcmp(value, "on") == 0) {
+    return true;
+  } else if (strcmp(value, "false") == 0 || strcmp(value, "no") == 0 || strcmp(value, "0") == 0 || strcmp(value, "off") == 0) {
+    return false;
+  } else {
+    return defValue;
+  }
+}
+
+void OlyUtility::stringToLower(char* string) {
+  if (string == NULL) {
+    return;
+  }
+
+  while (*string) {
+    *string = tolower(*string);
+    string++;
+  }
+}
+
+// Modifies fullpath with the path part including the trailing path separator
+int OlyUtility::getApplicationFullPath(char* fullpath, int sizeOfPath) {
+  memset(fullpath, 0, sizeOfPath);
+#if defined(WIN32)
+  int length = GetModuleFileName(NULL, fullpath, sizeOfPath);
+#elif defined(__linux__)
+  int length = readlink("/proc/self/exe", fullpath, sizeOfPath);
+#elif defined(DARWIN)
+  uint32_t length_u = (uint32_t)sizeOfPath;
+  int length = sizeOfPath;
+  if (_NSGetExecutablePath(fullpath, &length_u) == 0) {
+    length = strlen(fullpath);
+  }
+#endif
+
+  if (length == sizeOfPath) {
+    return -1;
+  }
+
+  fullpath[length] = 0;
+  getPathPart(fullpath);
+
+  return 0;
+}
+
+char* OlyUtility::readFromDisk(const char* file, unsigned int *size, bool appendNull) {
+  // Open the file
+  FILE* pFile = fopen(file, "rb");
+  if (pFile==NULL) {
+    return NULL;
+  }
+
+  // Obtain file size
+  fseek(pFile , 0 , SEEK_END);
+  unsigned int lSize = ftell(pFile);
+  rewind(pFile);
+
+  // Allocate memory to contain the whole file
+  char* buffer = (char*)malloc(lSize + (int)appendNull);
+  if (buffer == NULL) {
+    fclose(pFile);
+    return NULL;
+  }
+
+  // Copy the file into the buffer
+  if (fread(buffer, 1, lSize, pFile) != lSize) {
+    free(buffer);
+    fclose(pFile);
+    return NULL;
+  }
+
+  // Terminate
+  fclose(pFile);
+
+  if (appendNull) {
+    buffer[lSize] = 0;
+  }
+
+  if (size) {
+    *size = lSize;
+  }
+
+  return buffer;
+}
+
+int OlyUtility::writeToDisk(const char* path, const char* data) {
+  // Open the file
+  FILE* pFile = fopen(path, "wb");
+  if (pFile == NULL) {
+    return -1;
+  }
+
+  // Write the data to disk
+  if (fwrite(data, 1, strlen(data), pFile) != strlen(data)) {
+    fclose(pFile);
+    return -1;
+  }
+
+  // Terminate
+  fclose(pFile);
+  return 0;
+}
+
+int OlyUtility::appendToDisk(const char* path, const char* data) {
+  // Open the file
+  FILE* pFile = fopen(path, "a");
+  if (pFile == NULL) {
+    return -1;
+  }
+
+  // Write the data to disk
+  if (fwrite(data, 1, strlen(data), pFile) != strlen(data)) {
+    fclose(pFile);
+    return -1;
+  }
+
+  // Terminate
+  fclose(pFile);
+  return 0;
+}
+
+/**
+ * Copies the srcFile into dstFile in 1kB chunks.
+ * The dstFile will be overwritten if it exists.
+ * 0 is returned on an error; otherwise 1.
+ */
+#define TRANSFER_SIZE 1024
+int OlyUtility::copyFile(const char* srcFile, const char* dstFile) {
+  char buffer[TRANSFER_SIZE];
+  FILE * f_src = fopen(srcFile,"rb");
+  if (!f_src) {
+    return 0;
+  }
+  FILE * f_dst = fopen(dstFile,"wb");
+  if (!f_dst) {
+    fclose(f_src);
+    return 0;
+  }
+  while (!feof(f_src)) {
+    int num_bytes_read = fread(buffer, 1, TRANSFER_SIZE, f_src);
+    if (num_bytes_read < TRANSFER_SIZE && !feof(f_src)) {
+      fclose(f_src);
+      fclose(f_dst);
+      return 0;
+    }
+    int num_bytes_written = fwrite(buffer, 1, num_bytes_read, f_dst);
+    if (num_bytes_written != num_bytes_read) {
+      fclose(f_src);
+      fclose(f_dst);
+      return 0;
+    }
+  }
+  fclose(f_src);
+  fclose(f_dst);
+  return 1;
+}
+
+const char* OlyUtility::getFilePart(const char* path) {
+  const char* last_sep = strrchr(path, PATH_SEPARATOR);
+
+  // in case path is not a full path
+  if (last_sep == NULL) {
+    return path;
+  }
+
+  return last_sep++;
+}
+
+// getPathPart may modify the contents of path
+// returns the path including the trailing path separator
+char* OlyUtility::getPathPart(char* path) {
+  char* last_sep = strrchr(path, PATH_SEPARATOR);
+
+  // in case path is not a full path
+  if (last_sep == NULL) {
+    return 0;
+  }
+  last_sep++;
+  *last_sep = 0;
+
+  return (path);
+}
diff --git a/tools/gator/daemon/OlyUtility.h b/tools/gator/daemon/OlyUtility.h
new file mode 100644
index 000000000000..1d26beb596fa
--- /dev/null
+++ b/tools/gator/daemon/OlyUtility.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef OLY_UTILITY_H
+#define OLY_UTILITY_H
+
+#include <stddef.h>
+
+#ifdef WIN32
+#define PATH_SEPARATOR '\\'
+#define CAIMAN_PATH_MAX MAX_PATH
+#define snprintf _snprintf
+#else
+#include <limits.h>
+#define PATH_SEPARATOR '/'
+#define CAIMAN_PATH_MAX PATH_MAX
+#endif
+
+class OlyUtility {
+public:
+  OlyUtility() {};
+  ~OlyUtility() {};
+  bool stringToBool(const char* string, bool defValue);
+  void stringToLower(char* string);
+  int getApplicationFullPath(char* path, int sizeOfPath);
+  char* readFromDisk(const char* file, unsigned int *size = NULL, bool appendNull = true);
+  int writeToDisk(const char* path, const char* file);
+  int appendToDisk(const char* path, const char* file);
+  int copyFile(const char* srcFile, const char* dstFile);
+  const char* getFilePart(const char* path);
+  char* getPathPart(char* path);
+private:
+};
+
+extern OlyUtility* util;
+
+#endif // OLY_UTILITY_H
diff --git a/tools/gator/daemon/PerfBuffer.cpp b/tools/gator/daemon/PerfBuffer.cpp
new file mode 100644
index 000000000000..f127c996d43b
--- /dev/null
+++ b/tools/gator/daemon/PerfBuffer.cpp
@@ -0,0 +1,158 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "PerfBuffer.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "Buffer.h"
+#include "Logging.h"
+#include "Sender.h"
+#include "SessionData.h"
+
+PerfBuffer::PerfBuffer() {
+	for (int cpu = 0; cpu < ARRAY_LENGTH(mBuf); ++cpu) {
+		mBuf[cpu] = MAP_FAILED;
+		mDiscard[cpu] = false;
+		mFds[cpu] = -1;
+	}
+}
+
+PerfBuffer::~PerfBuffer() {
+	for (int cpu = ARRAY_LENGTH(mBuf) - 1; cpu >= 0; --cpu) {
+		if (mBuf[cpu] != MAP_FAILED) {
+			munmap(mBuf[cpu], gSessionData->mPageSize + BUF_SIZE);
+		}
+	}
+}
+
+bool PerfBuffer::useFd(const int cpu, const int fd) {
+	if (mFds[cpu] < 0) {
+		if (mBuf[cpu] != MAP_FAILED) {
+			logg->logMessage("%s(%s:%i): cpu %i already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__, cpu);
+			return false;
+		}
+
+		// The buffer isn't mapped yet
+		mBuf[cpu] = mmap(NULL, gSessionData->mPageSize + BUF_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+		if (mBuf[cpu] == MAP_FAILED) {
+			logg->logMessage("%s(%s:%i): mmap failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+		mFds[cpu] = fd;
+
+		// Check the version
+		struct perf_event_mmap_page *pemp = static_cast<struct perf_event_mmap_page *>(mBuf[cpu]);
+		if (pemp->compat_version != 0) {
+			logg->logMessage("%s(%s:%i): Incompatible perf_event_mmap_page compat_version", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+	} else {
+		if (mBuf[cpu] == MAP_FAILED) {
+			logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+
+		if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, mFds[cpu]) < 0) {
+			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void PerfBuffer::discard(const int cpu) {
+	if (mBuf[cpu] != MAP_FAILED) {
+		mDiscard[cpu] = true;
+	}
+}
+
+bool PerfBuffer::isEmpty() {
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		if (mBuf[cpu] != MAP_FAILED) {
+			// Take a snapshot of the positions
+			struct perf_event_mmap_page *pemp = static_cast<struct perf_event_mmap_page *>(mBuf[cpu]);
+			const __u64 head = pemp->data_head;
+			const __u64 tail = pemp->data_tail;
+
+			if (head != tail) {
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+static void compressAndSend(const int cpu, const __u64 head, __u64 tail, const uint8_t *const b, Sender *const sender) {
+	// Pick a big size but something smaller than the chunkSize in Sender::writeData which is 100k
+	char buf[1<<16];
+	int writePos = 0;
+	const int typeLength = gSessionData->mLocalCapture ? 0 : 1;
+
+	while (head > tail) {
+		writePos = 0;
+		if (!gSessionData->mLocalCapture) {
+			buf[writePos++] = RESPONSE_APC_DATA;
+		}
+		// Reserve space for size
+		writePos += sizeof(uint32_t);
+		Buffer::packInt(buf, sizeof(buf), writePos, FRAME_PERF);
+		Buffer::packInt(buf, sizeof(buf), writePos, cpu);
+
+		while (head > tail) {
+			const int count = reinterpret_cast<const struct perf_event_header *>(b + (tail & BUF_MASK))->size/sizeof(uint64_t);
+			// Can this whole message be written as Streamline assumes events are not split between frames
+			if (sizeof(buf) <= writePos + count*Buffer::MAXSIZE_PACK64) {
+				break;
+			}
+			for (int i = 0; i < count; ++i) {
+				// Must account for message size
+				Buffer::packInt64(buf, sizeof(buf), writePos, *reinterpret_cast<const uint64_t *>(b + (tail & BUF_MASK)));
+				tail += sizeof(uint64_t);
+			}
+		}
+
+		// Write size
+		Buffer::writeLEInt(reinterpret_cast<unsigned char *>(buf + typeLength), writePos - typeLength - sizeof(uint32_t));
+		sender->writeData(buf, writePos, RESPONSE_APC_DATA);
+	}
+}
+
+bool PerfBuffer::send(Sender *const sender) {
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		if (mBuf[cpu] == MAP_FAILED) {
+			continue;
+		}
+
+		// Take a snapshot of the positions
+		struct perf_event_mmap_page *pemp = static_cast<struct perf_event_mmap_page *>(mBuf[cpu]);
+		const __u64 head = pemp->data_head;
+		const __u64 tail = pemp->data_tail;
+
+		if (head > tail) {
+			const uint8_t *const b = static_cast<uint8_t *>(mBuf[cpu]) + gSessionData->mPageSize;
+			compressAndSend(cpu, head, tail, b, sender);
+
+			// Update tail with the data read
+			pemp->data_tail = head;
+		}
+
+		if (mDiscard[cpu]) {
+			munmap(mBuf[cpu], gSessionData->mPageSize + BUF_SIZE);
+			mBuf[cpu] = MAP_FAILED;
+			mDiscard[cpu] = false;
+			mFds[cpu] = -1;
+			logg->logMessage("%s(%s:%i): Unmaped cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
+		}
+	}
+
+	return true;
+}
diff --git a/tools/gator/daemon/PerfBuffer.h b/tools/gator/daemon/PerfBuffer.h
new file mode 100644
index 000000000000..25a10625a9e8
--- /dev/null
+++ b/tools/gator/daemon/PerfBuffer.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef PERF_BUFFER
+#define PERF_BUFFER
+
+#include "Config.h"
+
+#define BUF_SIZE (gSessionData->mTotalBufferSize * 1024 * 1024)
+#define BUF_MASK (BUF_SIZE - 1)
+
+class Sender;
+
+class PerfBuffer {
+public:
+	PerfBuffer();
+	~PerfBuffer();
+
+	bool useFd(const int cpu, const int fd);
+	void discard(const int cpu);
+	bool isEmpty();
+	bool send(Sender *const sender);
+
+private:
+	void *mBuf[NR_CPUS];
+	// After the buffer is flushed it should be unmaped
+	bool mDiscard[NR_CPUS];
+	// fd that corresponds to the mBuf
+	int mFds[NR_CPUS];
+
+	// Intentionally undefined
+	PerfBuffer(const PerfBuffer &);
+	PerfBuffer &operator=(const PerfBuffer &);
+};
+
+#endif // PERF_BUFFER
diff --git a/tools/gator/daemon/PerfDriver.cpp b/tools/gator/daemon/PerfDriver.cpp
new file mode 100644
index 000000000000..ee90284cee41
--- /dev/null
+++ b/tools/gator/daemon/PerfDriver.cpp
@@ -0,0 +1,351 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "PerfDriver.h"
+
+#include <dirent.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Config.h"
+#include "ConfigurationXML.h"
+#include "Counter.h"
+#include "DriverSource.h"
+#include "DynBuf.h"
+#include "Logging.h"
+#include "PerfGroup.h"
+#include "SessionData.h"
+#include "Setup.h"
+
+#define PERF_DEVICES "/sys/bus/event_source/devices"
+
+#define TYPE_DERIVED ~0U
+
+// From gator.h
+struct gator_cpu {
+	const int cpuid;
+	// Human readable name
+	const char *const core_name;
+	// gatorfs event and Perf PMU name
+	const char *const pmnc_name;
+	const int pmnc_counters;
+};
+
+// From gator_main.c
+static const struct gator_cpu gator_cpus[] = {
+	{ 0xb36, "ARM1136",      "ARM_ARM11",        3 },
+	{ 0xb56, "ARM1156",      "ARM_ARM11",        3 },
+	{ 0xb76, "ARM1176",      "ARM_ARM11",        3 },
+	{ 0xb02, "ARM11MPCore",  "ARM_ARM11MPCore",  3 },
+	{ 0xc05, "Cortex-A5",    "ARMv7_Cortex_A5",  2 },
+	{ 0xc07, "Cortex-A7",    "ARMv7_Cortex_A7",  4 },
+	{ 0xc08, "Cortex-A8",    "ARMv7_Cortex_A8",  4 },
+	{ 0xc09, "Cortex-A9",    "ARMv7_Cortex_A9",  6 },
+	{ 0xc0f, "Cortex-A15",   "ARMv7_Cortex_A15", 6 },
+	{ 0xc0e, "Cortex-A17",   "ARMv7_Cortex_A17", 6 },
+	{ 0x00f, "Scorpion",     "Scorpion",         4 },
+	{ 0x02d, "ScorpionMP",   "ScorpionMP",       4 },
+	{ 0x049, "KraitSIM",     "Krait",            4 },
+	{ 0x04d, "Krait",        "Krait",            4 },
+	{ 0x06f, "Krait S4 Pro", "Krait",            4 },
+	{ 0xd03, "Cortex-A53",   "ARM_Cortex-A53",   6 },
+	{ 0xd07, "Cortex-A57",   "ARM_Cortex-A57",   6 },
+	{ 0xd0f, "AArch64",      "ARM_AArch64",      6 },
+};
+
+static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
+static const char NEW_PMU_PREFIX[] = "ARMv7_Cortex_";
+
+struct uncore_counter {
+	// Perf PMU name
+	const char *const perfName;
+	// gatorfs event name
+	const char *const gatorName;
+	const int count;
+};
+
+static const struct uncore_counter uncore_counters[] = {
+	{ "CCI_400", "CCI_400", 4 },
+	{ "CCI_400-r1", "CCI_400-r1", 4 },
+	{ "ccn", "ARM_CCN_5XX", 8 },
+};
+
+class PerfCounter : public DriverCounter {
+public:
+	PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, bool perCpu) : DriverCounter(next, name), mType(type), mCount(0), mConfig(config), mPerCpu(perCpu) {}
+
+	~PerfCounter() {
+	}
+
+	uint32_t getType() const { return mType; }
+	int getCount() const { return mCount; }
+	void setCount(const int count) { mCount = count; }
+	uint64_t getConfig() const { return mConfig; }
+	void setConfig(const uint64_t config) { mConfig = config; }
+	bool isPerCpu() const { return mPerCpu; }
+
+private:
+	const uint32_t mType;
+	int mCount;
+	uint64_t mConfig;
+	bool mPerCpu;
+};
+
+PerfDriver::PerfDriver() : mIsSetup(false), mLegacySupport(false) {
+}
+
+PerfDriver::~PerfDriver() {
+}
+
+void PerfDriver::addCpuCounters(const char *const counterName, const int type, const int numCounters) {
+	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
+	char *name = new char[len];
+	snprintf(name, len, "%s_ccnt", counterName);
+	setCounters(new PerfCounter(getCounters(), name, type, -1, true));
+
+	for (int j = 0; j < numCounters; ++j) {
+		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
+		name = new char[len];
+		snprintf(name, len, "%s_cnt%d", counterName, j);
+		setCounters(new PerfCounter(getCounters(), name, type, -1, true));
+	}
+}
+
+void PerfDriver::addUncoreCounters(const char *const counterName, const int type, const int numCounters) {
+	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
+	char *name = new char[len];
+	snprintf(name, len, "%s_ccnt", counterName);
+	setCounters(new PerfCounter(getCounters(), name, type, -1, false));
+
+	for (int j = 0; j < numCounters; ++j) {
+		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
+		name = new char[len];
+		snprintf(name, len, "%s_cnt%d", counterName, j);
+		setCounters(new PerfCounter(getCounters(), name, type, -1, false));
+	}
+}
+
+bool PerfDriver::setup() {
+	// Check the kernel version
+	int release[3];
+	if (!getLinuxVersion(release)) {
+		logg->logMessage("%s(%s:%i): getLinuxVersion failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 4, 0)) {
+		logg->logMessage("%s(%s:%i): Unsupported kernel version", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	mLegacySupport = KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 12, 0);
+
+	if (access(EVENTS_PATH, R_OK) != 0) {
+		logg->logMessage("%s(%s:%i): " EVENTS_PATH " does not exist, is CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	// Add supported PMUs
+	bool foundCpu = false;
+	DIR *dir = opendir(PERF_DEVICES);
+	if (dir == NULL) {
+		logg->logMessage("%s(%s:%i): opendif failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	struct dirent *dirent;
+	while ((dirent = readdir(dir)) != NULL) {
+		for (int i = 0; i < ARRAY_LENGTH(gator_cpus); ++i) {
+			const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+
+			// Do the names match exactly?
+			if (strcasecmp(gator_cpu->pmnc_name, dirent->d_name) != 0 &&
+			    // Do these names match but have the old vs new prefix?
+			    ((strncasecmp(dirent->d_name, OLD_PMU_PREFIX, sizeof(OLD_PMU_PREFIX) - 1) != 0 ||
+			      strncasecmp(gator_cpu->pmnc_name, NEW_PMU_PREFIX, sizeof(NEW_PMU_PREFIX) - 1) != 0 ||
+			      strcasecmp(dirent->d_name + sizeof(OLD_PMU_PREFIX) - 1, gator_cpu->pmnc_name + sizeof(NEW_PMU_PREFIX) - 1) != 0))) {
+				continue;
+			}
+
+			int type;
+			char buf[256];
+			snprintf(buf, sizeof(buf), PERF_DEVICES "/%s/type", dirent->d_name);
+			if (DriverSource::readIntDriver(buf, &type) != 0) {
+				continue;
+			}
+
+			foundCpu = true;
+			logg->logMessage("Adding cpu counters for %s", gator_cpu->pmnc_name);
+			addCpuCounters(gator_cpu->pmnc_name, type, gator_cpu->pmnc_counters);
+		}
+
+		for (int i = 0; i < ARRAY_LENGTH(uncore_counters); ++i) {
+			if (strcmp(dirent->d_name, uncore_counters[i].perfName) != 0) {
+				continue;
+			}
+
+			int type;
+			char buf[256];
+			snprintf(buf, sizeof(buf), PERF_DEVICES "/%s/type", dirent->d_name);
+			if (DriverSource::readIntDriver(buf, &type) != 0) {
+				continue;
+			}
+
+			logg->logMessage("Adding uncore counters for %s", uncore_counters[i].gatorName);
+			addUncoreCounters(uncore_counters[i].gatorName, type, uncore_counters[i].count);
+		}
+	}
+	closedir(dir);
+
+	if (!foundCpu) {
+		// If no cpu was found based on pmu names, try by cpuid
+		for (int i = 0; i < ARRAY_LENGTH(gator_cpus); ++i) {
+			if (gSessionData->mMaxCpuId != gator_cpus[i].cpuid) {
+				continue;
+			}
+
+			foundCpu = true;
+			logg->logMessage("Adding cpu counters (based on cpuid) for %s", gator_cpus[i].pmnc_name);
+			addCpuCounters(gator_cpus[i].pmnc_name, PERF_TYPE_RAW, gator_cpus[i].pmnc_counters);
+		}
+	}
+
+	/*
+	if (!foundCpu) {
+		// If all else fails, use the perf architected counters
+		// 9 because that's how many are in events-Perf-Hardware.xml - assume they can all be enabled at once
+		addCpuCounters("Perf_Hardware", PERF_TYPE_HARDWARE, 9);
+	}
+	*/
+
+	// Add supported software counters
+	long long id;
+	DynBuf printb;
+
+	id = getTracepointId("irq/softirq_exit", &printb);
+	if (id >= 0) {
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, true));
+	}
+
+	id = getTracepointId("irq/irq_handler_exit", &printb);
+	if (id >= 0) {
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, true));
+	}
+
+	id = getTracepointId(SCHED_SWITCH, &printb);
+	if (id >= 0) {
+		setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, true));
+	}
+
+	setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, false));
+
+	//Linux_cpu_wait_io
+
+	mIsSetup = true;
+	return true;
+}
+
+bool PerfDriver::summary(Buffer *const buffer) {
+	struct utsname utsname;
+	if (uname(&utsname) != 0) {
+		logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	char buf[512];
+	snprintf(buf, sizeof(buf), "%s %s %s %s %s GNU/Linux", utsname.sysname, utsname.nodename, utsname.release, utsname.version, utsname.machine);
+
+	struct timespec ts;
+	if (clock_gettime(CLOCK_REALTIME, &ts) != 0) {
+		logg->logMessage("%s(%s:%i): clock_gettime failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	const int64_t timestamp = (int64_t)ts.tv_sec * NS_PER_S + ts.tv_nsec;
+
+	const uint64_t monotonicStarted = getTime();
+	gSessionData->mMonotonicStarted = monotonicStarted;
+
+	buffer->summary(monotonicStarted, timestamp, monotonicStarted, monotonicStarted, buf);
+
+	for (int i = 0; i < gSessionData->mCores; ++i) {
+		coreName(monotonicStarted, buffer, i);
+	}
+	buffer->commit(monotonicStarted);
+
+	return true;
+}
+
+void PerfDriver::coreName(const uint32_t startTime, Buffer *const buffer, const int cpu) {
+	// Don't send information on a cpu we know nothing about
+	if (gSessionData->mCpuIds[cpu] == -1) {
+		return;
+	}
+
+	int j;
+	for (j = 0; j < ARRAY_LENGTH(gator_cpus); ++j) {
+		if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
+			break;
+		}
+	}
+	if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
+		buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], gator_cpus[j].core_name);
+	} else {
+		char buf[32];
+		if (gSessionData->mCpuIds[cpu] == -1) {
+			snprintf(buf, sizeof(buf), "Unknown");
+		} else {
+			snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[cpu]);
+		}
+		buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], buf);
+	}
+}
+
+void PerfDriver::setupCounter(Counter &counter) {
+	PerfCounter *const perfCounter = static_cast<PerfCounter *>(findCounter(counter));
+	if (perfCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+
+	// Don't use the config from counters XML if it's not set, ex: software counters
+	if (counter.getEvent() != -1) {
+		perfCounter->setConfig(counter.getEvent());
+	}
+	perfCounter->setCount(counter.getCount());
+	perfCounter->setEnabled(true);
+	counter.setKey(perfCounter->getKey());
+}
+
+bool PerfDriver::enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const {
+	for (PerfCounter *counter = static_cast<PerfCounter *>(getCounters()); counter != NULL; counter = static_cast<PerfCounter *>(counter->getNext())) {
+		if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED)) {
+			if (!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0, counter->isPerCpu() ? PERF_GROUP_PER_CPU : 0)) {
+				logg->logMessage("%s(%s:%i): PerfGroup::add failed", __FUNCTION__, __FILE__, __LINE__);
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+long long PerfDriver::getTracepointId(const char *const name, DynBuf *const printb) {
+	if (!printb->printf(EVENTS_PATH "/%s/id", name)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return -1;
+	}
+
+	int64_t result;
+	if (DriverSource::readInt64Driver(printb->getBuf(), &result) != 0) {
+		logg->logMessage("%s(%s:%i): DriverSource::readInt64Driver failed", __FUNCTION__, __FILE__, __LINE__);
+		return -1;
+	}
+
+	return result;
+}
diff --git a/tools/gator/daemon/PerfDriver.h b/tools/gator/daemon/PerfDriver.h
new file mode 100644
index 000000000000..846203a9e18b
--- /dev/null
+++ b/tools/gator/daemon/PerfDriver.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef PERFDRIVER_H
+#define PERFDRIVER_H
+
+#include <stdint.h>
+
+#include "Driver.h"
+
+// If debugfs is not mounted at /sys/kernel/debug, update DEBUGFS_PATH
+#define DEBUGFS_PATH "/sys/kernel/debug"
+#define EVENTS_PATH DEBUGFS_PATH "/tracing/events"
+
+#define SCHED_SWITCH "sched/sched_switch"
+#define CPU_IDLE "power/cpu_idle"
+
+class Buffer;
+class DynBuf;
+class PerfGroup;
+
+class PerfDriver : public SimpleDriver {
+public:
+	PerfDriver();
+	~PerfDriver();
+
+	bool getLegacySupport() const { return mLegacySupport; }
+
+	bool setup();
+	bool summary(Buffer *const buffer);
+	void coreName(const uint32_t startTime, Buffer *const buffer, const int cpu);
+	bool isSetup() const { return mIsSetup; }
+
+	void setupCounter(Counter &counter);
+
+	bool enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const;
+
+	static long long getTracepointId(const char *const name, DynBuf *const printb);
+
+private:
+	void addCpuCounters(const char *const counterName, const int type, const int numCounters);
+	void addUncoreCounters(const char *const counterName, const int type, const int numCounters);
+
+	bool mIsSetup;
+	bool mLegacySupport;
+
+	// Intentionally undefined
+	PerfDriver(const PerfDriver &);
+	PerfDriver &operator=(const PerfDriver &);
+};
+
+#endif // PERFDRIVER_H
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp
new file mode 100644
index 000000000000..4fd960a9058c
--- /dev/null
+++ b/tools/gator/daemon/PerfGroup.cpp
@@ -0,0 +1,244 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "PerfGroup.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Logging.h"
+#include "Monitor.h"
+#include "PerfBuffer.h"
+#include "SessionData.h"
+
+#define DEFAULT_PEA_ARGS(pea, additionalSampleType) \
+	pea.size = sizeof(pea); \
+	/* Emit time, read_format below, group leader id, and raw tracepoint info */ \
+	pea.sample_type = (gSessionData->perf.getLegacySupport() \
+										 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID \
+										 : PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER ) | additionalSampleType; \
+	/* Emit emit value in group format */ \
+	pea.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP; \
+	/* start out disabled */ \
+	pea.disabled = 1; \
+	/* have a sampling interrupt happen when we cross the wakeup_watermark boundary */ \
+	pea.watermark = 1; \
+	/* Be conservative in flush size as only one buffer set is monitored */ \
+	pea.wakeup_watermark = BUF_SIZE / 2
+
+static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t pid, const int cpu, const int group_fd, const unsigned long flags) {
+	int fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+	if (fd < 0) {
+		return -1;
+	}
+	int fdf = fcntl(fd, F_GETFD);
+	if ((fdf == -1) || (fcntl(fd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		close(fd);
+		return -1;
+	}
+	return fd;
+}
+
+PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) {
+	memset(&mAttrs, 0, sizeof(mAttrs));
+	memset(&mPerCpu, 0, sizeof(mPerCpu));
+	memset(&mKeys, -1, sizeof(mKeys));
+	memset(&mFds, -1, sizeof(mFds));
+}
+
+PerfGroup::~PerfGroup() {
+	for (int pos = ARRAY_LENGTH(mFds) - 1; pos >= 0; --pos) {
+		if (mFds[pos] >= 0) {
+			close(mFds[pos]);
+		}
+	}
+}
+
+bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
+	int i;
+	for (i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+		if (mKeys[i] < 0) {
+			break;
+		}
+	}
+
+	if (i >= ARRAY_LENGTH(mKeys)) {
+		logg->logMessage("%s(%s:%i): Too many counters", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	DEFAULT_PEA_ARGS(mAttrs[i], sampleType);
+	mAttrs[i].type = type;
+	mAttrs[i].config = config;
+	mAttrs[i].sample_period = sample;
+	// always be on the CPU but only a group leader can be pinned
+	mAttrs[i].pinned = (i == 0 ? 1 : 0);
+	mAttrs[i].mmap = (flags & PERF_GROUP_MMAP ? 1 : 0);
+	mAttrs[i].comm = (flags & PERF_GROUP_COMM ? 1 : 0);
+	mAttrs[i].freq = (flags & PERF_GROUP_FREQ ? 1 : 0);
+	mAttrs[i].task = (flags & PERF_GROUP_TASK ? 1 : 0);
+	mAttrs[i].sample_id_all = (flags & PERF_GROUP_SAMPLE_ID_ALL ? 1 : 0);
+	mPerCpu[i] = (flags & PERF_GROUP_PER_CPU);
+
+	mKeys[i] = key;
+
+	buffer->pea(currTime, &mAttrs[i], key);
+
+	return true;
+}
+
+int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) {
+	logg->logMessage("%s(%s:%i): Onlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
+
+	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+		if (mKeys[i] < 0) {
+			continue;
+		}
+
+		if ((cpu != 0) && !mPerCpu[i]) {
+			continue;
+		}
+
+		const int offset = i * gSessionData->mCores;
+		if (mFds[cpu + offset] >= 0) {
+			logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__);
+			return PG_FAILURE;
+		}
+
+		logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all);
+		mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT);
+		if (mFds[cpu + offset] < 0) {
+			logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno));
+			if (errno == ENODEV) {
+				return PG_CPU_OFFLINE;
+			}
+			continue;
+		}
+
+		if (!mPb->useFd(cpu, mFds[cpu + offset])) {
+			logg->logMessage("%s(%s:%i): PerfBuffer::useFd failed", __FUNCTION__, __FILE__, __LINE__);
+			return PG_FAILURE;
+		}
+
+
+		if (!monitor->add(mFds[cpu + offset])) {
+		  logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__);
+		  return PG_FAILURE;
+		}
+	}
+
+	return PG_SUCCESS;
+}
+
+int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer) {
+	__u64 ids[ARRAY_LENGTH(mKeys)];
+	int coreKeys[ARRAY_LENGTH(mKeys)];
+	int idCount = 0;
+
+	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+		const int fd = mFds[cpu + i * gSessionData->mCores];
+		if (fd < 0) {
+			continue;
+		}
+
+		coreKeys[idCount] = mKeys[i];
+		if (!gSessionData->perf.getLegacySupport() && ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 &&
+				// Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works
+				ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) {
+			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			return 0;
+		}
+		++idCount;
+	}
+
+	if (!gSessionData->perf.getLegacySupport()) {
+		buffer->keys(currTime, idCount, ids, coreKeys);
+	} else {
+		char buf[1024];
+		ssize_t bytes = read(mFds[cpu], buf, sizeof(buf));
+		if (bytes < 0) {
+			logg->logMessage("read failed");
+			return 0;
+		}
+		buffer->keysOld(currTime, idCount, coreKeys, bytes, buf);
+	}
+
+	if (start) {
+		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+			int offset = i * gSessionData->mCores + cpu;
+			if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE, 0) < 0) {
+				logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+				return 0;
+			}
+		}
+	}
+
+	if (idCount == 0) {
+		logg->logMessage("%s(%s:%i): no events came online", __FUNCTION__, __FILE__, __LINE__);
+	}
+
+	return idCount;
+}
+
+bool PerfGroup::offlineCPU(const int cpu) {
+	logg->logMessage("%s(%s:%i): Offlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
+
+	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+		int offset = i * gSessionData->mCores + cpu;
+		if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE, 0) < 0) {
+			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+	}
+
+	// Mark the buffer so that it will be released next time it's read
+	mPb->discard(cpu);
+
+	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+		if (mKeys[i] < 0) {
+			continue;
+		}
+
+		int offset = i * gSessionData->mCores + cpu;
+		if (mFds[offset] >= 0) {
+			close(mFds[offset]);
+			mFds[offset] = -1;
+		}
+	}
+
+	return true;
+}
+
+bool PerfGroup::start() {
+	for (int pos = 0; pos < ARRAY_LENGTH(mFds); ++pos) {
+		if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE, 0) < 0) {
+			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+	}
+
+	return true;
+
+ fail:
+	stop();
+
+	return false;
+}
+
+void PerfGroup::stop() {
+	for (int pos = ARRAY_LENGTH(mFds) - 1; pos >= 0; --pos) {
+		if (mFds[pos] >= 0) {
+			ioctl(mFds[pos], PERF_EVENT_IOC_DISABLE, 0);
+		}
+	}
+}
diff --git a/tools/gator/daemon/PerfGroup.h b/tools/gator/daemon/PerfGroup.h
new file mode 100644
index 000000000000..f7b3d725bac7
--- /dev/null
+++ b/tools/gator/daemon/PerfGroup.h
@@ -0,0 +1,65 @@
+ /**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef PERF_GROUP
+#define PERF_GROUP
+
+#include <stdint.h>
+
+// Use a snapshot of perf_event.h as it may be more recent than what is on the target and if not newer features won't be supported anyways
+#include "k/perf_event.h"
+
+#include "Config.h"
+
+class Buffer;
+class Monitor;
+class PerfBuffer;
+
+enum PerfGroupFlags {
+	PERF_GROUP_MMAP          = 1 << 0,
+	PERF_GROUP_COMM          = 1 << 1,
+	PERF_GROUP_FREQ          = 1 << 2,
+	PERF_GROUP_TASK          = 1 << 3,
+	PERF_GROUP_SAMPLE_ID_ALL = 1 << 4,
+	PERF_GROUP_PER_CPU       = 1 << 5,
+};
+
+enum {
+	PG_SUCCESS = 0,
+	PG_FAILURE,
+	PG_CPU_OFFLINE,
+};
+
+class PerfGroup {
+public:
+	PerfGroup(PerfBuffer *const pb);
+	~PerfGroup();
+
+	bool add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags);
+	// Safe to call concurrently
+	int prepareCPU(const int cpu, Monitor *const monitor);
+	// Not safe to call concurrently. Returns the number of events enabled
+	int onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer);
+	bool offlineCPU(int cpu);
+	bool start();
+	void stop();
+
+private:
+	// +1 for the group leader
+	struct perf_event_attr mAttrs[MAX_PERFORMANCE_COUNTERS + 1];
+	bool mPerCpu[MAX_PERFORMANCE_COUNTERS + 1];
+	int mKeys[MAX_PERFORMANCE_COUNTERS + 1];
+	int mFds[NR_CPUS * (MAX_PERFORMANCE_COUNTERS + 1)];
+	PerfBuffer *const mPb;
+
+	// Intentionally undefined
+	PerfGroup(const PerfGroup &);
+	PerfGroup &operator=(const PerfGroup &);
+};
+
+#endif // PERF_GROUP
diff --git a/tools/gator/daemon/PerfSource.cpp b/tools/gator/daemon/PerfSource.cpp
new file mode 100644
index 000000000000..193b7789a290
--- /dev/null
+++ b/tools/gator/daemon/PerfSource.cpp
@@ -0,0 +1,449 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "PerfSource.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "Child.h"
+#include "DynBuf.h"
+#include "Logging.h"
+#include "PerfDriver.h"
+#include "Proc.h"
+#include "SessionData.h"
+
+#ifndef SCHED_RESET_ON_FORK
+#define SCHED_RESET_ON_FORK 0x40000000
+#endif
+
+extern Child *child;
+
+static bool sendTracepointFormat(const uint64_t currTime, Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) {
+	if (!printb->printf(EVENTS_PATH "/%s/format", name)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	if (!b->read(printb->getBuf())) {
+		logg->logMessage("%s(%s:%i): DynBuf::read failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	buffer->format(currTime, b->getLength(), b->getBuf());
+
+	return true;
+}
+
+static void *syncFunc(void *arg)
+{
+	struct timespec ts;
+	int64_t nextTime = gSessionData->mMonotonicStarted;
+	int err;
+	(void)arg;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-sync", 0, 0, 0);
+
+	// Mask all signals so that this thread will not be woken up
+	{
+		sigset_t set;
+		if (sigfillset(&set) != 0) {
+			logg->logError(__FILE__, __LINE__, "sigfillset failed");
+			handleException();
+		}
+		if ((err = pthread_sigmask(SIG_SETMASK, &set, NULL)) != 0) {
+			logg->logError(__FILE__, __LINE__, "pthread_sigmask failed");
+			handleException();
+		}
+	}
+
+	for (;;) {
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
+			logg->logError(__FILE__, __LINE__, "clock_gettime failed");
+			handleException();
+		}
+		const int64_t currTime = ts.tv_sec * NS_PER_S + ts.tv_nsec;
+
+		// Wake up once a second
+		nextTime += NS_PER_S;
+
+		// Always sleep more than 1 ms, hopefully things will line up better next time
+		const int64_t sleepTime = max(nextTime - currTime, (int64_t)(NS_PER_MS + 1));
+		ts.tv_sec = sleepTime/NS_PER_S;
+		ts.tv_nsec = sleepTime % NS_PER_S;
+
+		err = nanosleep(&ts, NULL);
+		if (err != 0) {
+			fprintf(stderr, "clock_nanosleep failed: %s\n", strerror(err));
+			return NULL;
+		}
+	}
+
+	return NULL;
+}
+
+static long getMaxCoreNum() {
+	DIR *dir = opendir("/sys/devices/system/cpu");
+	if (dir == NULL) {
+		logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, opendir failed");
+		handleException();
+	}
+
+	long maxCoreNum = -1;
+	struct dirent *dirent;
+	while ((dirent = readdir(dir)) != NULL) {
+		if (strncmp(dirent->d_name, "cpu", 3) == 0) {
+			char *endptr;
+			errno = 0;
+			long coreNum = strtol(dirent->d_name + 3, &endptr, 10);
+			if ((errno == 0) && (*endptr == '\0') && (coreNum >= maxCoreNum)) {
+				maxCoreNum = coreNum + 1;
+			}
+		}
+	}
+	closedir(dir);
+
+	if (maxCoreNum < 1) {
+		logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, no cpu# directories found");
+		handleException();
+	}
+
+	if (maxCoreNum >= NR_CPUS) {
+		logg->logError(__FILE__, __LINE__, "Too many cores on the target, please increase NR_CPUS in Config.h");
+		handleException();
+	}
+
+	return maxCoreNum;
+}
+
+PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mIdleGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) {
+	long l = sysconf(_SC_PAGE_SIZE);
+	if (l < 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to obtain the page size");
+		handleException();
+	}
+	gSessionData->mPageSize = static_cast<int>(l);
+	gSessionData->mCores = static_cast<int>(getMaxCoreNum());
+}
+
+PerfSource::~PerfSource() {
+}
+
+bool PerfSource::prepare() {
+	DynBuf printb;
+	DynBuf b1;
+	long long schedSwitchId;
+	long long cpuIdleId;
+
+	const uint64_t currTime = getTime();
+
+	// Reread cpuinfo since cores may have changed since startup
+	gSessionData->readCpuInfo();
+
+	if (0
+			|| !mMonitor.init()
+			|| !mUEvent.init()
+			|| !mMonitor.add(mUEvent.getFd())
+
+			|| (schedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &printb)) < 0
+			|| !sendTracepointFormat(currTime, &mBuffer, SCHED_SWITCH, &printb, &b1)
+
+			|| (cpuIdleId = PerfDriver::getTracepointId(CPU_IDLE, &printb)) < 0
+			|| !sendTracepointFormat(currTime, &mBuffer, CPU_IDLE, &printb, &b1)
+
+			// Only want RAW but not IP on sched_switch and don't want TID on SAMPLE_ID
+			|| !mCountersGroup.add(currTime, &mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU)
+			|| !mIdleGroup.add(currTime, &mBuffer, 101/**/, PERF_TYPE_TRACEPOINT, cpuIdleId, 1, PERF_SAMPLE_RAW, PERF_GROUP_PER_CPU)
+
+			// Only want TID and IP but not RAW on timer
+			|| (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(currTime, &mBuffer, 102/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, PERF_GROUP_PER_CPU))
+
+			|| !gSessionData->perf.enable(currTime, &mCountersGroup, &mBuffer)
+			|| 0) {
+		logg->logMessage("%s(%s:%i): perf setup failed, are you running Linux 3.4 or later?", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		const int result = mCountersGroup.prepareCPU(cpu, &mMonitor);
+		if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mCountersGroup failed");
+			handleException();
+		}
+	}
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		const int result = mIdleGroup.prepareCPU(cpu, &mMonitor);
+		if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mIdleGroup failed");
+			handleException();
+		}
+	}
+
+	int numEvents = 0;
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		numEvents += mCountersGroup.onlineCPU(currTime, cpu, false, &mBuffer);
+	}
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		numEvents += mIdleGroup.onlineCPU(currTime, cpu, false, &mBuffer);
+	}
+	if (numEvents <= 0) {
+		logg->logMessage("%s(%s:%i): PerfGroup::onlineCPU failed on all cores", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	// Send the summary right before the start so that the monotonic delta is close to the start time
+	if (!gSessionData->perf.summary(&mSummary)) {
+	  logg->logError(__FILE__, __LINE__, "PerfDriver::summary failed", __FUNCTION__, __FILE__, __LINE__);
+	  handleException();
+	}
+
+	// Start the timer thread to used to sync perf and monotonic raw times
+	pthread_t syncThread;
+	if (pthread_create(&syncThread, NULL, syncFunc, NULL)) {
+	  logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__);
+	  handleException();
+	}
+	struct sched_param param;
+	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+	if (pthread_setschedparam(syncThread, SCHED_FIFO | SCHED_RESET_ON_FORK, &param) != 0) {
+	  logg->logError(__FILE__, __LINE__, "pthread_setschedparam failed");
+	  handleException();
+	}
+
+	mBuffer.commit(currTime);
+
+	return true;
+}
+
+struct ProcThreadArgs {
+	Buffer *mBuffer;
+	uint64_t mCurrTime;
+	bool mIsDone;
+};
+
+void *procFunc(void *arg) {
+	DynBuf printb;
+	DynBuf b;
+	const ProcThreadArgs *const args = (ProcThreadArgs *)arg;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-proc", 0, 0, 0);
+
+	// Gator runs at a high priority, reset the priority to the default
+	if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) {
+		logg->logError(__FILE__, __LINE__, "setpriority failed");
+		handleException();
+	}
+
+	if (!readProcMaps(args->mCurrTime, args->mBuffer, &printb, &b)) {
+		logg->logError(__FILE__, __LINE__, "readProcMaps failed");
+		handleException();
+	}
+	args->mBuffer->commit(args->mCurrTime);
+
+	if (!readKallsyms(args->mCurrTime, args->mBuffer, &args->mIsDone)) {
+		logg->logError(__FILE__, __LINE__, "readKallsyms failed");
+		handleException();
+	}
+	args->mBuffer->commit(args->mCurrTime);
+
+	return NULL;
+}
+
+static const char CPU_DEVPATH[] = "/devices/system/cpu/cpu";
+
+void PerfSource::run() {
+	int pipefd[2];
+	pthread_t procThread;
+	ProcThreadArgs procThreadArgs;
+
+	{
+		DynBuf printb;
+		DynBuf b1;
+		DynBuf b2;
+
+		const uint64_t currTime = getTime();
+
+		// Start events before reading proc to avoid race conditions
+		if (!mCountersGroup.start() || !mIdleGroup.start()) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::start failed", __FUNCTION__, __FILE__, __LINE__);
+			handleException();
+		}
+
+		if (!readProcComms(currTime, &mBuffer, &printb, &b1, &b2)) {
+			logg->logError(__FILE__, __LINE__, "readProcComms failed");
+			handleException();
+		}
+		mBuffer.commit(currTime);
+
+		// Postpone reading kallsyms as on android adb gets too backed up and data is lost
+		procThreadArgs.mBuffer = &mBuffer;
+		procThreadArgs.mCurrTime = currTime;
+		procThreadArgs.mIsDone = false;
+		if (pthread_create(&procThread, NULL, procFunc, &procThreadArgs)) {
+			logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__);
+			handleException();
+		}
+	}
+
+	if (pipe_cloexec(pipefd) != 0) {
+		logg->logError(__FILE__, __LINE__, "pipe failed");
+		handleException();
+	}
+	mInterruptFd = pipefd[1];
+
+	if (!mMonitor.add(pipefd[0])) {
+		logg->logError(__FILE__, __LINE__, "Monitor::add failed");
+		handleException();
+	}
+
+	int timeout = -1;
+	if (gSessionData->mLiveRate > 0) {
+		timeout = gSessionData->mLiveRate/NS_PER_MS;
+	}
+
+	sem_post(mStartProfile);
+
+	while (gSessionData->mSessionIsActive) {
+		// +1 for uevents, +1 for pipe
+		struct epoll_event events[NR_CPUS + 2];
+		int ready = mMonitor.wait(events, ARRAY_LENGTH(events), timeout);
+		if (ready < 0) {
+			logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
+			handleException();
+		}
+		const uint64_t currTime = getTime();
+
+		for (int i = 0; i < ready; ++i) {
+			if (events[i].data.fd == mUEvent.getFd()) {
+				if (!handleUEvent(currTime)) {
+					logg->logError(__FILE__, __LINE__, "PerfSource::handleUEvent failed");
+					handleException();
+				}
+				break;
+			}
+		}
+
+		// send a notification that data is ready
+		sem_post(mSenderSem);
+
+		// In one shot mode, stop collection once all the buffers are filled
+		// Assume timeout == 0 in this case
+		if (gSessionData->mOneShot && gSessionData->mSessionIsActive) {
+			logg->logMessage("%s(%s:%i): One shot", __FUNCTION__, __FILE__, __LINE__);
+			child->endSession();
+		}
+	}
+
+	procThreadArgs.mIsDone = true;
+	pthread_join(procThread, NULL);
+	mIdleGroup.stop();
+	mCountersGroup.stop();
+	mBuffer.setDone();
+	mIsDone = true;
+
+	// send a notification that data is ready
+	sem_post(mSenderSem);
+
+	mInterruptFd = -1;
+	close(pipefd[0]);
+	close(pipefd[1]);
+}
+
+bool PerfSource::handleUEvent(const uint64_t currTime) {
+	UEventResult result;
+	if (!mUEvent.read(&result)) {
+		logg->logMessage("%s(%s:%i): UEvent::Read failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	if (strcmp(result.mSubsystem, "cpu") == 0) {
+		if (strncmp(result.mDevPath, CPU_DEVPATH, sizeof(CPU_DEVPATH) - 1) != 0) {
+			logg->logMessage("%s(%s:%i): Unexpected cpu DEVPATH format", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+		char *endptr;
+		errno = 0;
+		int cpu = strtol(result.mDevPath + sizeof(CPU_DEVPATH) - 1, &endptr, 10);
+		if (errno != 0 || *endptr != '\0') {
+			logg->logMessage("%s(%s:%i): strtol failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+
+		if (cpu >= gSessionData->mCores) {
+			logg->logError(__FILE__, __LINE__, "Only %i cores are expected but core %i reports %s", gSessionData->mCores, cpu, result.mAction);
+			handleException();
+		}
+
+		if (strcmp(result.mAction, "online") == 0) {
+			mBuffer.onlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu);
+			// Only call onlineCPU if prepareCPU succeeded
+			bool result = false;
+			int err = mCountersGroup.prepareCPU(cpu, &mMonitor);
+			if (err == PG_CPU_OFFLINE) {
+				result = true;
+			} else if (err == PG_SUCCESS) {
+				if (mCountersGroup.onlineCPU(currTime, cpu, true, &mBuffer)) {
+					err = mIdleGroup.prepareCPU(cpu, &mMonitor);
+					if (err == PG_CPU_OFFLINE) {
+						result = true;
+					} else if (err == PG_SUCCESS) {
+						if (mIdleGroup.onlineCPU(currTime, cpu, true, &mBuffer)) {
+							result = true;
+						}
+					}
+				}
+			}
+			mBuffer.commit(currTime);
+
+			gSessionData->readCpuInfo();
+			gSessionData->perf.coreName(currTime, &mSummary, cpu);
+			mSummary.commit(currTime);
+			return result;
+		} else if (strcmp(result.mAction, "offline") == 0) {
+			const bool result = mCountersGroup.offlineCPU(cpu) && mIdleGroup.offlineCPU(cpu);
+			mBuffer.offlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu);
+			return result;
+		}
+	}
+
+	return true;
+}
+
+void PerfSource::interrupt() {
+	if (mInterruptFd >= 0) {
+		int8_t c = 0;
+		// Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread
+		if (::write(mInterruptFd, &c, sizeof(c)) != sizeof(c)) {
+			logg->logError(__FILE__, __LINE__, "write failed");
+			handleException();
+		}
+	}
+}
+
+bool PerfSource::isDone () {
+	return mBuffer.isDone() && mIsDone && mCountersBuf.isEmpty();
+}
+
+void PerfSource::write (Sender *sender) {
+	if (!mSummary.isDone()) {
+		mSummary.write(sender);
+		gSessionData->mSentSummary = true;
+	}
+	if (!mBuffer.isDone()) {
+		mBuffer.write(sender);
+	}
+	if (!mCountersBuf.send(sender)) {
+		logg->logError(__FILE__, __LINE__, "PerfBuffer::send failed");
+		handleException();
+	}
+}
diff --git a/tools/gator/daemon/PerfSource.h b/tools/gator/daemon/PerfSource.h
new file mode 100644
index 000000000000..ce1eafe8e953
--- /dev/null
+++ b/tools/gator/daemon/PerfSource.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef PERFSOURCE_H
+#define PERFSOURCE_H
+
+#include <semaphore.h>
+
+#include "Buffer.h"
+#include "Monitor.h"
+#include "PerfBuffer.h"
+#include "PerfGroup.h"
+#include "Source.h"
+#include "UEvent.h"
+
+class Sender;
+
+class PerfSource : public Source {
+public:
+	PerfSource(sem_t *senderSem, sem_t *startProfile);
+	~PerfSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+private:
+	bool handleUEvent(const uint64_t currTime);
+
+	Buffer mSummary;
+	Buffer mBuffer;
+	PerfBuffer mCountersBuf;
+	PerfGroup mCountersGroup;
+	PerfGroup mIdleGroup;
+	Monitor mMonitor;
+	UEvent mUEvent;
+	sem_t *const mSenderSem;
+	sem_t *const mStartProfile;
+	int mInterruptFd;
+	bool mIsDone;
+
+	// Intentionally undefined
+	PerfSource(const PerfSource &);
+	PerfSource &operator=(const PerfSource &);
+};
+
+#endif // PERFSOURCE_H
diff --git a/tools/gator/daemon/Proc.cpp b/tools/gator/daemon/Proc.cpp
new file mode 100644
index 000000000000..e6b26b1199fa
--- /dev/null
+++ b/tools/gator/daemon/Proc.cpp
@@ -0,0 +1,312 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Proc.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "DynBuf.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+struct ProcStat {
+	// From linux-dev/include/linux/sched.h
+#define TASK_COMM_LEN 16
+	// TASK_COMM_LEN may grow, so be ready for it to get larger
+	char comm[2*TASK_COMM_LEN];
+	long numThreads;
+};
+
+static bool readProcStat(ProcStat *const ps, const char *const pathname, DynBuf *const b) {
+	if (!b->read(pathname)) {
+		logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the thread exited", __FUNCTION__, __FILE__, __LINE__);
+		// This is not a fatal error - the thread just doesn't exist any more
+		return true;
+	}
+
+	char *comm = strchr(b->getBuf(), '(');
+	if (comm == NULL) {
+		logg->logMessage("%s(%s:%i): parsing stat failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	++comm;
+	char *const str = strrchr(comm, ')');
+	if (str == NULL) {
+		logg->logMessage("%s(%s:%i): parsing stat failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+	*str = '\0';
+	strncpy(ps->comm, comm, sizeof(ps->comm) - 1);
+	ps->comm[sizeof(ps->comm) - 1] = '\0';
+
+	const int count = sscanf(str + 2, " %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &ps->numThreads);
+	if (count != 1) {
+		logg->logMessage("%s(%s:%i): sscanf failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	return true;
+}
+
+static const char APP_PROCESS[] = "app_process";
+
+static const char *readProcExe(DynBuf *const printb, const int pid, const int tid, DynBuf *const b) {
+	if (tid == -1 ? !printb->printf("/proc/%i/exe", pid)
+			: !printb->printf("/proc/%i/task/%i/exe", pid, tid)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	const int err = b->readlink(printb->getBuf());
+	const char *image;
+	if (err == 0) {
+		image = strrchr(b->getBuf(), '/');
+		if (image == NULL) {
+			image = b->getBuf();
+		} else {
+			++image;
+		}
+	} else if (err == -ENOENT) {
+		// readlink /proc/[pid]/exe returns ENOENT for kernel threads
+		image = "\0";
+	} else {
+		logg->logMessage("%s(%s:%i): DynBuf::readlink failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	// Android apps are run by app_process but the cmdline is changed to reference the actual app name
+	// On 64-bit android app_process can be app_process32 or app_process64
+	if (strncmp(image, APP_PROCESS, sizeof(APP_PROCESS) - 1) != 0) {
+		return image;
+	}
+
+	if (tid == -1 ? !printb->printf("/proc/%i/cmdline", pid)
+			: !printb->printf("/proc/%i/task/%i/cmdline", pid, tid)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	if (!b->read(printb->getBuf())) {
+		logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the thread exited", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	return b->getBuf();
+}
+
+static bool readProcTask(const uint64_t currTime, Buffer *const buffer, const int pid, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
+	bool result = false;
+
+	if (!b1->printf("/proc/%i/task", pid)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return result;
+	}
+	DIR *task = opendir(b1->getBuf());
+	if (task == NULL) {
+		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
+		// This is not a fatal error - the thread just doesn't exist any more
+		return true;
+	}
+
+	struct dirent *dirent;
+	while ((dirent = readdir(task)) != NULL) {
+		char *endptr;
+		const int tid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0') {
+			// Ignore task items that are not integers like ., etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/task/%i/stat", pid, tid)) {
+			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+		ProcStat ps;
+		if (!readProcStat(&ps, printb->getBuf(), b1)) {
+			logg->logMessage("%s(%s:%i): readProcStat failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+
+		const char *const image = readProcExe(printb, pid, tid, b2);
+		if (image == NULL) {
+			logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+
+		buffer->comm(currTime, pid, tid, image, ps.comm);
+	}
+
+	result = true;
+
+ fail:
+	closedir(task);
+
+	return result;
+}
+
+bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
+	bool result = false;
+
+	DIR *proc = opendir("/proc");
+	if (proc == NULL) {
+		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
+		return result;
+	}
+
+	struct dirent *dirent;
+	while ((dirent = readdir(proc)) != NULL) {
+		char *endptr;
+		const int pid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0') {
+			// Ignore proc items that are not integers like ., cpuinfo, etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/stat", pid)) {
+			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+		ProcStat ps;
+		if (!readProcStat(&ps, printb->getBuf(), b1)) {
+			logg->logMessage("%s(%s:%i): readProcStat failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+
+		if (ps.numThreads <= 1) {
+			const char *const image = readProcExe(printb, pid, -1, b1);
+			if (image == NULL) {
+				logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__);
+				goto fail;
+			}
+
+			buffer->comm(currTime, pid, pid, image, ps.comm);
+		} else {
+			if (!readProcTask(currTime, buffer, pid, printb, b1, b2)) {
+				logg->logMessage("%s(%s:%i): readProcTask failed", __FUNCTION__, __FILE__, __LINE__);
+				goto fail;
+			}
+		}
+	}
+
+	result = true;
+
+ fail:
+	closedir(proc);
+
+	return result;
+}
+
+bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b) {
+	bool result = false;
+
+	DIR *proc = opendir("/proc");
+	if (proc == NULL) {
+		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
+		return result;
+	}
+
+	struct dirent *dirent;
+	while ((dirent = readdir(proc)) != NULL) {
+		char *endptr;
+		const int pid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0') {
+			// Ignore proc items that are not integers like ., cpuinfo, etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/maps", pid)) {
+			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+		if (!b->read(printb->getBuf())) {
+			logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__);
+			// This is not a fatal error - the process just doesn't exist any more
+			continue;
+		}
+
+		buffer->maps(currTime, pid, pid, b->getBuf());
+	}
+
+	result = true;
+
+ fail:
+	closedir(proc);
+
+	return result;
+}
+
+bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *const isDone) {
+	int fd = ::open("/proc/kallsyms", O_RDONLY | O_CLOEXEC);
+
+	if (fd < 0) {
+		logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__);
+		return true;
+	};
+
+	char buf[1<<12];
+	ssize_t pos = 0;
+	while (gSessionData->mSessionIsActive && !ACCESS_ONCE(*isDone)) {
+		// Assert there is still space in the buffer
+		if (sizeof(buf) - pos - 1 == 0) {
+			logg->logError(__FILE__, __LINE__, "no space left in buffer");
+			handleException();
+		}
+
+		{
+			// -1 to reserve space for \0
+			const ssize_t bytes = ::read(fd, buf + pos, sizeof(buf) - pos - 1);
+			if (bytes < 0) {
+				logg->logError(__FILE__, __LINE__, "read failed", __FUNCTION__, __FILE__, __LINE__);
+				handleException();
+			}
+			if (bytes == 0) {
+				// Assert the buffer is empty
+				if (pos != 0) {
+					logg->logError(__FILE__, __LINE__, "buffer not empty on eof");
+					handleException();
+				}
+				break;
+			}
+			pos += bytes;
+		}
+
+		ssize_t newline;
+		// Find the last '\n'
+		for (newline = pos - 1; newline >= 0; --newline) {
+			if (buf[newline] == '\n') {
+				const char was = buf[newline + 1];
+				buf[newline + 1] = '\0';
+				buffer->kallsyms(currTime, buf);
+				// Sleep 3 ms to avoid sending out too much data too quickly
+				usleep(3000);
+				buf[0] = was;
+				// Assert the memory regions do not overlap
+				if (pos - newline >= newline + 1) {
+					logg->logError(__FILE__, __LINE__, "memcpy src and dst overlap");
+					handleException();
+				}
+				if (pos - newline - 2 > 0) {
+					memcpy(buf + 1, buf + newline + 2, pos - newline - 2);
+				}
+				pos -= newline + 1;
+				break;
+			}
+		}
+	}
+
+	close(fd);
+
+	return true;
+}
diff --git a/tools/gator/daemon/Proc.h b/tools/gator/daemon/Proc.h
new file mode 100644
index 000000000000..2a1a7cbc1e99
--- /dev/null
+++ b/tools/gator/daemon/Proc.h
@@ -0,0 +1,21 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef PROC_H
+#define PROC_H
+
+#include <stdint.h>
+
+class Buffer;
+class DynBuf;
+
+bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2);
+bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b);
+bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *const isDone);
+
+#endif // PROC_H
diff --git a/tools/gator/daemon/Sender.cpp b/tools/gator/daemon/Sender.cpp
new file mode 100644
index 000000000000..8a54a6678974
--- /dev/null
+++ b/tools/gator/daemon/Sender.cpp
@@ -0,0 +1,129 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Sender.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Logging.h"
+#include "OlySocket.h"
+#include "SessionData.h"
+
+Sender::Sender(OlySocket* socket) {
+	mDataFile = NULL;
+	mDataSocket = NULL;
+
+	// Set up the socket connection
+	if (socket) {
+		char streamline[64] = {0};
+		mDataSocket = socket;
+
+		// Receive magic sequence - can wait forever
+		// Streamline will send data prior to the magic sequence for legacy support, which should be ignored for v4+
+		while (strcmp("STREAMLINE", streamline) != 0) {
+			if (mDataSocket->receiveString(streamline, sizeof(streamline)) == -1) {
+				logg->logError(__FILE__, __LINE__, "Socket disconnected");
+				handleException();
+			}
+		}
+
+		// Send magic sequence - must be done first, after which error messages can be sent
+		char magic[32];
+		snprintf(magic, 32, "GATOR %i\n", PROTOCOL_VERSION);
+		mDataSocket->send(magic, strlen(magic));
+
+		gSessionData->mWaitingOnCommand = true;
+		logg->logMessage("Completed magic sequence");
+	}
+
+	pthread_mutex_init(&mSendMutex, NULL);
+}
+
+Sender::~Sender() {
+	// Just close it as the client socket is on the stack
+	if (mDataSocket != NULL) {
+		mDataSocket->closeSocket();
+		mDataSocket = NULL;
+	}
+	if (mDataFile != NULL) {
+		fclose(mDataFile);
+	}
+}
+
+void Sender::createDataFile(char* apcDir) {
+	if (apcDir == NULL) {
+		return;
+	}
+
+	mDataFileName = (char*)malloc(strlen(apcDir) + 12);
+	sprintf(mDataFileName, "%s/0000000000", apcDir);
+	mDataFile = fopen_cloexec(mDataFileName, "wb");
+	if (!mDataFile) {
+		logg->logError(__FILE__, __LINE__, "Failed to open binary file: %s", mDataFileName);
+		handleException();
+	}
+}
+
+void Sender::writeData(const char* data, int length, int type) {
+	if (length < 0 || (data == NULL && length > 0)) {
+		return;
+	}
+
+	// Multiple threads call writeData()
+	pthread_mutex_lock(&mSendMutex);
+
+	// Send data over the socket connection
+	if (mDataSocket) {
+		// Start alarm
+		const int alarmDuration = 8;
+		alarm(alarmDuration);
+
+		// Send data over the socket, sending the type and size first
+		logg->logMessage("Sending data with length %d", length);
+		if (type != RESPONSE_APC_DATA) {
+			// type and length already added by the Collector for apc data
+			unsigned char header[5];
+			header[0] = type;
+			Buffer::writeLEInt(header + 1, length);
+			mDataSocket->send((char*)&header, sizeof(header));
+		}
+
+		// 100Kbits/sec * alarmDuration sec / 8 bits/byte
+		const int chunkSize = 100*1000 * alarmDuration / 8;
+		int pos = 0;
+		while (true) {
+			mDataSocket->send((const char*)data + pos, min(length - pos, chunkSize));
+			pos += chunkSize;
+			if (pos >= length) {
+				break;
+			}
+
+			// Reset the alarm
+			alarm(alarmDuration);
+			logg->logMessage("Resetting the alarm");
+		}
+
+		// Stop alarm
+		alarm(0);
+	}
+
+	// Write data to disk as long as it is not meta data
+	if (mDataFile && type == RESPONSE_APC_DATA) {
+		logg->logMessage("Writing data with length %d", length);
+		// Send data to the data file
+		if (fwrite(data, 1, length, mDataFile) != (unsigned int)length) {
+			logg->logError(__FILE__, __LINE__, "Failed writing binary file %s", mDataFileName);
+			handleException();
+		}
+	}
+
+	pthread_mutex_unlock(&mSendMutex);
+}
diff --git a/tools/gator/daemon/Sender.h b/tools/gator/daemon/Sender.h
new file mode 100644
index 000000000000..5aa911713820
--- /dev/null
+++ b/tools/gator/daemon/Sender.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SENDER_H__
+#define __SENDER_H__
+
+#include <stdio.h>
+#include <pthread.h>
+
+class OlySocket;
+
+enum {
+	RESPONSE_XML = 1,
+	RESPONSE_APC_DATA = 3,
+	RESPONSE_ACK = 4,
+	RESPONSE_NAK = 5,
+	RESPONSE_ERROR = 0xFF
+};
+
+class Sender {
+public:
+	Sender(OlySocket* socket);
+	~Sender();
+	void writeData(const char* data, int length, int type);
+	void createDataFile(char* apcDir);
+private:
+	OlySocket* mDataSocket;
+	FILE* mDataFile;
+	char* mDataFileName;
+	pthread_mutex_t mSendMutex;
+
+	// Intentionally unimplemented
+	Sender(const Sender &);
+	Sender &operator=(const Sender &);
+};
+
+#endif //__SENDER_H__
diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp
new file mode 100644
index 000000000000..0e65d7842647
--- /dev/null
+++ b/tools/gator/daemon/SessionData.cpp
@@ -0,0 +1,261 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "SessionData.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "CPUFreqDriver.h"
+#include "DiskIODriver.h"
+#include "FSDriver.h"
+#include "HwmonDriver.h"
+#include "Logging.h"
+#include "MemInfoDriver.h"
+#include "NetDriver.h"
+#include "SessionXML.h"
+
+#define CORE_NAME_UNKNOWN "unknown"
+
+SessionData* gSessionData = NULL;
+
+SessionData::SessionData() {
+	usDrivers[0] = new HwmonDriver();
+	usDrivers[1] = new FSDriver();
+	usDrivers[2] = new MemInfoDriver();
+	usDrivers[3] = new NetDriver();
+	usDrivers[4] = new CPUFreqDriver();
+	usDrivers[5] = new DiskIODriver();
+	initialize();
+}
+
+SessionData::~SessionData() {
+}
+
+void SessionData::initialize() {
+	mWaitingOnCommand = false;
+	mSessionIsActive = false;
+	mLocalCapture = false;
+	mOneShot = false;
+	mSentSummary = false;
+	mAllowCommands = false;
+	const size_t cpuIdSize = sizeof(int)*NR_CPUS;
+	// Share mCpuIds across all instances of gatord
+	mCpuIds = (int *)mmap(NULL, cpuIdSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (mCpuIds == MAP_FAILED) {
+		logg->logError(__FILE__, __LINE__, "Unable to mmap shared memory for cpuids");
+		handleException();
+	}
+	memset(mCpuIds, -1, cpuIdSize);
+	strcpy(mCoreName, CORE_NAME_UNKNOWN);
+	readModel();
+	readCpuInfo();
+	mImages = NULL;
+	mConfigurationXMLPath = NULL;
+	mSessionXMLPath = NULL;
+	mEventsXMLPath = NULL;
+	mTargetPath = NULL;
+	mAPCDir = NULL;
+	mCaptureWorkingDir = NULL;
+	mCaptureCommand = NULL;
+	mCaptureUser = NULL;
+	mSampleRate = 0;
+	mLiveRate = 0;
+	mDuration = 0;
+	mMonotonicStarted = -1;
+	mBacktraceDepth = 0;
+	mTotalBufferSize = 0;
+	// sysconf(_SC_NPROCESSORS_CONF) is unreliable on 2.6 Android, get the value from the kernel module
+	mCores = 1;
+	mPageSize = 0;
+}
+
+void SessionData::parseSessionXML(char* xmlString) {
+	SessionXML session(xmlString);
+	session.parse();
+
+	// Set session data values - use prime numbers just below the desired value to reduce the chance of events firing at the same time
+	if (strcmp(session.parameters.sample_rate, "high") == 0) {
+		mSampleRate = 9973; // 10000
+	} else if (strcmp(session.parameters.sample_rate, "normal") == 0) {
+		mSampleRate = 997; // 1000
+	} else if (strcmp(session.parameters.sample_rate, "low") == 0) {
+		mSampleRate = 97; // 100
+	} else if (strcmp(session.parameters.sample_rate, "none") == 0) {
+		mSampleRate = 0;
+	} else {
+		logg->logError(__FILE__, __LINE__, "Invalid sample rate (%s) in session xml.", session.parameters.sample_rate);
+		handleException();
+	}
+	mBacktraceDepth = session.parameters.call_stack_unwinding == true ? 128 : 0;
+
+	// Determine buffer size (in MB) based on buffer mode
+	mOneShot = true;
+	if (strcmp(session.parameters.buffer_mode, "streaming") == 0) {
+		mOneShot = false;
+		mTotalBufferSize = 1;
+	} else if (strcmp(session.parameters.buffer_mode, "small") == 0) {
+		mTotalBufferSize = 1;
+	} else if (strcmp(session.parameters.buffer_mode, "normal") == 0) {
+		mTotalBufferSize = 4;
+	} else if (strcmp(session.parameters.buffer_mode, "large") == 0) {
+		mTotalBufferSize = 16;
+	} else {
+		logg->logError(__FILE__, __LINE__, "Invalid value for buffer mode in session xml.");
+		handleException();
+	}
+
+	// Convert milli- to nanoseconds
+	mLiveRate = session.parameters.live_rate * (int64_t)1000000;
+	if (mLiveRate > 0 && mLocalCapture) {
+		logg->logMessage("Local capture is not compatable with live, disabling live");
+		mLiveRate = 0;
+	}
+
+	if (!mAllowCommands && (mCaptureCommand != NULL)) {
+		logg->logError(__FILE__, __LINE__, "Running a command during a capture is not currently allowed. Please restart gatord with the -a flag.");
+		handleException();
+	}
+}
+
+void SessionData::readModel() {
+	FILE *fh = fopen("/proc/device-tree/model", "rb");
+	if (fh == NULL) {
+		return;
+	}
+
+	char buf[256];
+	if (fgets(buf, sizeof(buf), fh) != NULL) {
+		strcpy(mCoreName, buf);
+	}
+
+	fclose(fh);
+}
+
+void SessionData::readCpuInfo() {
+	char temp[256]; // arbitrarily large amount
+	mMaxCpuId = -1;
+
+	FILE *f = fopen("/proc/cpuinfo", "r");
+	if (f == NULL) {
+		logg->logMessage("Error opening /proc/cpuinfo\n"
+			"The core name in the captured xml file will be 'unknown'.");
+		return;
+	}
+
+	bool foundCoreName = false;
+	int processor = -1;
+	while (fgets(temp, sizeof(temp), f)) {
+		const size_t len = strlen(temp);
+
+		if (len == 1) {
+			// New section, clear the processor. Streamline will not know the cpus if the pre Linux 3.8 format of cpuinfo is encountered but also that no incorrect information will be transmitted.
+			processor = -1;
+			continue;
+		}
+
+		if (len > 0) {
+			// Replace the line feed with a null
+			temp[len - 1] = '\0';
+		}
+
+		const bool foundHardware = strstr(temp, "Hardware") != 0;
+		const bool foundCPUPart = strstr(temp, "CPU part") != 0;
+		const bool foundProcessor = strstr(temp, "processor") != 0;
+		if (foundHardware || foundCPUPart || foundProcessor) {
+			char* position = strchr(temp, ':');
+			if (position == NULL || (unsigned int)(position - temp) + 2 >= strlen(temp)) {
+				logg->logMessage("Unknown format of /proc/cpuinfo\n"
+					"The core name in the captured xml file will be 'unknown'.");
+				return;
+			}
+			position += 2;
+
+			if (foundHardware && (strcmp(mCoreName, CORE_NAME_UNKNOWN) == 0)) {
+				strncpy(mCoreName, position, sizeof(mCoreName));
+				mCoreName[sizeof(mCoreName) - 1] = 0; // strncpy does not guarantee a null-terminated string
+				foundCoreName = true;
+			}
+
+			if (foundCPUPart) {
+				const int cpuId = strtol(position, NULL, 0);
+				// If this does not have the full topology in /proc/cpuinfo, mCpuIds[0] may not have the 1 CPU part emitted - this guarantees it's in mMaxCpuId
+				if (cpuId > mMaxCpuId) {
+					mMaxCpuId = cpuId;
+				}
+				if (processor >= NR_CPUS) {
+					logg->logMessage("Too many processors, please increase NR_CPUS");
+				} else if (processor >= 0) {
+					mCpuIds[processor] = cpuId;
+				}
+			}
+
+			if (foundProcessor) {
+				processor = strtol(position, NULL, 0);
+			}
+		}
+	}
+
+	if (!foundCoreName) {
+		logg->logMessage("Could not determine core name from /proc/cpuinfo\n"
+				 "The core name in the captured xml file will be 'unknown'.");
+	}
+	fclose(f);
+}
+
+uint64_t getTime() {
+	struct timespec ts;
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
+		logg->logError(__FILE__, __LINE__, "Failed to get uptime");
+		handleException();
+	}
+	return (NS_PER_S*ts.tv_sec + ts.tv_nsec);
+}
+
+int getEventKey() {
+	// key 0 is reserved as a timestamp
+	// key 1 is reserved as the marker for thread specific counters
+	// key 2 is reserved as the marker for core
+	// Odd keys are assigned by the driver, even keys by the daemon
+	static int key = 4;
+
+	const int ret = key;
+	key += 2;
+	return ret;
+}
+
+int pipe_cloexec(int pipefd[2]) {
+	if (pipe(pipefd) != 0) {
+		return -1;
+	}
+
+	int fdf;
+	if (((fdf = fcntl(pipefd[0], F_GETFD)) == -1) || (fcntl(pipefd[0], F_SETFD, fdf | FD_CLOEXEC) != 0) ||
+			((fdf = fcntl(pipefd[1], F_GETFD)) == -1) || (fcntl(pipefd[1], F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		close(pipefd[0]);
+		close(pipefd[1]);
+		return -1;
+	}
+	return 0;
+}
+
+FILE *fopen_cloexec(const char *path, const char *mode) {
+	FILE *fh = fopen(path, mode);
+	if (fh == NULL) {
+		return NULL;
+	}
+	int fd = fileno(fh);
+	int fdf = fcntl(fd, F_GETFD);
+	if ((fdf == -1) || (fcntl(fd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		fclose(fh);
+		return NULL;
+	}
+	return fh;
+}
diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h
new file mode 100644
index 000000000000..ed282af4a869
--- /dev/null
+++ b/tools/gator/daemon/SessionData.h
@@ -0,0 +1,102 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SESSION_DATA_H
+#define SESSION_DATA_H
+
+#include <stdint.h>
+
+#include "AnnotateListener.h"
+#include "Config.h"
+#include "Counter.h"
+#include "FtraceDriver.h"
+#include "KMod.h"
+#include "MaliVideoDriver.h"
+#include "PerfDriver.h"
+
+#define PROTOCOL_VERSION 20
+// Differentiates development versions (timestamp) from release versions
+#define PROTOCOL_DEV 1000
+
+#define NS_PER_S 1000000000LL
+#define NS_PER_MS 1000000LL
+#define NS_PER_US 1000LL
+
+struct ImageLinkList {
+	char* path;
+	struct ImageLinkList *next;
+};
+
+class SessionData {
+public:
+	static const size_t MAX_STRING_LEN = 80;
+
+	SessionData();
+	~SessionData();
+	void initialize();
+	void parseSessionXML(char* xmlString);
+	void readModel();
+	void readCpuInfo();
+
+	PolledDriver *usDrivers[6];
+	KMod kmod;
+	PerfDriver perf;
+	MaliVideoDriver maliVideo;
+	FtraceDriver ftraceDriver;
+	AnnotateListener annotateListener;
+
+	char mCoreName[MAX_STRING_LEN];
+	struct ImageLinkList *mImages;
+	char *mConfigurationXMLPath;
+	char *mSessionXMLPath;
+	char *mEventsXMLPath;
+	char *mTargetPath;
+	char *mAPCDir;
+	char *mCaptureWorkingDir;
+	char *mCaptureCommand;
+	char *mCaptureUser;
+
+	bool mWaitingOnCommand;
+	bool mSessionIsActive;
+	bool mLocalCapture;
+	// halt processing of the driver data until profiling is complete or the buffer is filled
+	bool mOneShot;
+	bool mIsEBS;
+	bool mSentSummary;
+	bool mAllowCommands;
+
+	int64_t mMonotonicStarted;
+	int mBacktraceDepth;
+	// number of MB to use for the entire collection buffer
+	int mTotalBufferSize;
+	int mSampleRate;
+	int64_t mLiveRate;
+	int mDuration;
+	int mCores;
+	int mPageSize;
+	int *mCpuIds;
+	int mMaxCpuId;
+
+	// PMU Counters
+	int mCounterOverflow;
+	Counter mCounters[MAX_PERFORMANCE_COUNTERS];
+
+private:
+	// Intentionally unimplemented
+	SessionData(const SessionData &);
+	SessionData &operator=(const SessionData &);
+};
+
+extern SessionData* gSessionData;
+
+uint64_t getTime();
+int getEventKey();
+int pipe_cloexec(int pipefd[2]);
+FILE *fopen_cloexec(const char *path, const char *mode);
+
+#endif // SESSION_DATA_H
diff --git a/tools/gator/daemon/SessionXML.cpp b/tools/gator/daemon/SessionXML.cpp
new file mode 100644
index 000000000000..dea4c8f299ec
--- /dev/null
+++ b/tools/gator/daemon/SessionXML.cpp
@@ -0,0 +1,111 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "SessionXML.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "Logging.h"
+#include "OlyUtility.h"
+#include "SessionData.h"
+
+static const char *TAG_SESSION = "session";
+static const char *TAG_IMAGE   = "image";
+
+static const char *ATTR_VERSION              = "version";
+static const char *ATTR_CALL_STACK_UNWINDING = "call_stack_unwinding";
+static const char *ATTR_BUFFER_MODE          = "buffer_mode";
+static const char *ATTR_SAMPLE_RATE          = "sample_rate";
+static const char *ATTR_DURATION             = "duration";
+static const char *ATTR_PATH                 = "path";
+static const char *ATTR_LIVE_RATE            = "live_rate";
+static const char *ATTR_CAPTURE_WORKING_DIR  = "capture_working_dir";
+static const char *ATTR_CAPTURE_COMMAND      = "capture_command";
+static const char *ATTR_CAPTURE_USER         = "capture_user";
+
+SessionXML::SessionXML(const char *str) {
+	parameters.buffer_mode[0] = 0;
+	parameters.sample_rate[0] = 0;
+	parameters.call_stack_unwinding = false;
+	parameters.live_rate = 0;
+	mSessionXML = str;
+	logg->logMessage(mSessionXML);
+}
+
+SessionXML::~SessionXML() {
+}
+
+void SessionXML::parse() {
+	mxml_node_t *tree;
+	mxml_node_t *node;
+
+	tree = mxmlLoadString(NULL, mSessionXML, MXML_NO_CALLBACK);
+	node = mxmlFindElement(tree, tree, TAG_SESSION, NULL, NULL, MXML_DESCEND);
+
+	if (node) {
+		sessionTag(tree, node);
+		mxmlDelete(tree);
+		return;
+	}
+
+	logg->logError(__FILE__, __LINE__, "No session tag found in the session.xml file");
+	handleException();
+}
+
+void SessionXML::sessionTag(mxml_node_t *tree, mxml_node_t *node) {
+	int version = 0;
+	if (mxmlElementGetAttr(node, ATTR_VERSION)) version = strtol(mxmlElementGetAttr(node, ATTR_VERSION), NULL, 10);
+	if (version != 1) {
+		logg->logError(__FILE__, __LINE__, "Invalid session.xml version: %d", version);
+		handleException();
+	}
+
+	// copy to pre-allocated strings
+	if (mxmlElementGetAttr(node, ATTR_BUFFER_MODE)) {
+		strncpy(parameters.buffer_mode, mxmlElementGetAttr(node, ATTR_BUFFER_MODE), sizeof(parameters.buffer_mode));
+		parameters.buffer_mode[sizeof(parameters.buffer_mode) - 1] = 0; // strncpy does not guarantee a null-terminated string
+	}
+	if (mxmlElementGetAttr(node, ATTR_SAMPLE_RATE)) {
+		strncpy(parameters.sample_rate, mxmlElementGetAttr(node, ATTR_SAMPLE_RATE), sizeof(parameters.sample_rate));
+		parameters.sample_rate[sizeof(parameters.sample_rate) - 1] = 0; // strncpy does not guarantee a null-terminated string
+	}
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_WORKING_DIR)) gSessionData->mCaptureWorkingDir = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_WORKING_DIR));
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_COMMAND)) gSessionData->mCaptureCommand = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_COMMAND));
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_USER)) gSessionData->mCaptureUser = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_USER));
+
+	// integers/bools
+	parameters.call_stack_unwinding = util->stringToBool(mxmlElementGetAttr(node, ATTR_CALL_STACK_UNWINDING), false);
+	if (mxmlElementGetAttr(node, ATTR_DURATION)) gSessionData->mDuration = strtol(mxmlElementGetAttr(node, ATTR_DURATION), NULL, 10);
+	if (mxmlElementGetAttr(node, ATTR_LIVE_RATE)) parameters.live_rate = strtol(mxmlElementGetAttr(node, ATTR_LIVE_RATE), NULL, 10);
+
+	// parse subtags
+	node = mxmlGetFirstChild(node);
+	while (node) {
+		if (mxmlGetType(node) != MXML_ELEMENT) {
+			node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
+			continue;
+		}
+		if (strcmp(TAG_IMAGE, mxmlGetElement(node)) == 0) {
+			sessionImage(node);
+		}
+		node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
+	}
+}
+
+void SessionXML::sessionImage(mxml_node_t *node) {
+	int length = strlen(mxmlElementGetAttr(node, ATTR_PATH));
+	struct ImageLinkList *image;
+
+	image = (struct ImageLinkList *)malloc(sizeof(struct ImageLinkList));
+	image->path = (char*)malloc(length + 1);
+	image->path = strdup(mxmlElementGetAttr(node, ATTR_PATH));
+	image->next = gSessionData->mImages;
+	gSessionData->mImages = image;
+}
diff --git a/tools/gator/daemon/SessionXML.h b/tools/gator/daemon/SessionXML.h
new file mode 100644
index 000000000000..53965749c74b
--- /dev/null
+++ b/tools/gator/daemon/SessionXML.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SESSION_XML_H
+#define SESSION_XML_H
+
+#include "mxml/mxml.h"
+
+struct ImageLinkList;
+
+struct ConfigParameters {
+	// buffer mode, "streaming", "low", "normal", "high" defines oneshot and buffer size
+	char buffer_mode[64];
+	// capture mode, "high", "normal", or "low"
+	char sample_rate[64];
+	// whether stack unwinding is performed
+	bool call_stack_unwinding;
+	int live_rate;
+};
+
+class SessionXML {
+public:
+	SessionXML(const char *str);
+	~SessionXML();
+	void parse();
+	ConfigParameters parameters;
+private:
+	const char *mSessionXML;
+	void sessionTag(mxml_node_t *tree, mxml_node_t *node);
+	void sessionImage(mxml_node_t *node);
+
+	// Intentionally unimplemented
+	SessionXML(const SessionXML &);
+	SessionXML &operator=(const SessionXML &);
+};
+
+#endif // SESSION_XML_H
diff --git a/tools/gator/daemon/Setup.cpp b/tools/gator/daemon/Setup.cpp
new file mode 100644
index 000000000000..d4ce0328c633
--- /dev/null
+++ b/tools/gator/daemon/Setup.cpp
@@ -0,0 +1,232 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Setup.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "Config.h"
+#include "DynBuf.h"
+#include "Logging.h"
+
+bool getLinuxVersion(int version[3]) {
+	// Check the kernel version
+	struct utsname utsname;
+	if (uname(&utsname) != 0) {
+		logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	version[0] = 0;
+	version[1] = 0;
+	version[2] = 0;
+
+	int part = 0;
+	char *ch = utsname.release;
+	while (*ch >= '0' && *ch <= '9' && part < 3) {
+		version[part] = 10*version[part] + *ch - '0';
+
+		++ch;
+		if (*ch == '.') {
+			++part;
+			++ch;
+		}
+	}
+
+	return true;
+}
+
+static int pgrep_gator(DynBuf *const printb) {
+	DynBuf b;
+
+	DIR *proc = opendir("/proc");
+	if (proc == NULL) {
+		logg->logError(__FILE__, __LINE__, "gator: error: opendir failed");
+		handleException();
+	}
+
+	int self = getpid();
+
+	struct dirent *dirent;
+	while ((dirent = readdir(proc)) != NULL) {
+		char *endptr;
+		const int pid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0' || (pid == self)) {
+			// Ignore proc items that are not integers like ., cpuinfo, etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/stat", pid)) {
+			logg->logError(__FILE__, __LINE__, "gator: error: DynBuf::printf failed");
+			handleException();
+		}
+
+		if (!b.read(printb->getBuf())) {
+			// This is not a fatal error - the thread just doesn't exist any more
+			continue;
+		}
+
+		char *comm = strchr(b.getBuf(), '(');
+		if (comm == NULL) {
+			logg->logError(__FILE__, __LINE__, "gator: error: parsing stat begin failed");
+			handleException();
+		}
+		++comm;
+		char *const str = strrchr(comm, ')');
+		if (str == NULL) {
+			logg->logError(__FILE__, __LINE__, "gator: error: parsing stat end failed");
+			handleException();
+		}
+		*str = '\0';
+
+		if (strncmp(comm, "gator", 5) == 0) {
+			// Assume there is only one gator process
+			return pid;
+		}
+	}
+
+	closedir(proc);
+
+	return -1;
+}
+
+int update(const char *const gatorPath) {
+	printf("gator: starting\n");
+
+	int version[3];
+	if (!getLinuxVersion(version)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: getLinuxVersion failed");
+		handleException();
+	}
+
+	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(2, 6, 32)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device.");
+		handleException();
+	}
+
+	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(3, 4, 0)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as gator.ko is required for this version of Linux. Please build gator.ko and gatord and install them on your device.");
+		handleException();
+	}
+
+	if (access("/sys/module/gator", F_OK) == 0) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline has detected that the gator kernel module is loaded on your device. Please build an updated version of gator.ko and gatord and install them on your device.");
+		handleException();
+	}
+
+	if (geteuid() != 0) {
+		printf("gator: trying sudo\n");
+		execlp("sudo", "sudo", gatorPath, "-u", NULL);
+		// Streamline will provide the password if needed
+
+		printf("gator: trying su\n");
+		char buf[1<<10];
+		snprintf(buf, sizeof(buf), "%s -u", gatorPath);
+		execlp("su", "su", "-", "-c", buf, NULL);
+		// Streamline will provide the password if needed
+
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline was unable to sudo to root on your device. Please double check passwords, ensure sudo or su work with this user or try a different username.");
+		handleException();
+	}
+	printf("gator: now root\n");
+
+	// setenforce 0 not needed for userspace gator
+
+	// Kill existing gator
+	DynBuf gatorStatPath;
+	int gator_main = pgrep_gator(&gatorStatPath);
+	if (gator_main > 0) {
+		if (kill(gator_main, SIGTERM) != 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: kill SIGTERM failed");
+			handleException();
+		}
+		for (int i = 0; ; ++i) {
+			if (access(gatorStatPath.getBuf(), F_OK) != 0) {
+				break;
+			}
+			if (i == 5) {
+				if (kill(gator_main, SIGKILL) != 0) {
+					logg->logError(__FILE__, __LINE__, "gator: error: kill SIGKILL failed");
+					handleException();
+				}
+			} else if (i >= 10) {
+				logg->logError(__FILE__, __LINE__, "gator: error: unable to kill running gator");
+				handleException();
+			}
+			sleep(1);
+		}
+	}
+	printf("gator: no gatord running\n");
+
+	rename("gatord", "gatord.old");
+	rename("gator.ko", "gator.ko.old");
+
+	// Rename gatord.YYYYMMDDHHMMSSMMMM to gatord
+	char *newGatorPath = strdup(gatorPath);
+	char *dot = strrchr(newGatorPath, '.');
+	if (dot != NULL) {
+		*dot = '\0';
+		if (rename(gatorPath, newGatorPath) != 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: rename failed");
+			handleException();
+		}
+	}
+
+	// Fork and start gatord (redirect stdout and stderr)
+	int child = fork();
+	if (child < 0) {
+		logg->logError(__FILE__, __LINE__, "gator: error: fork failed");
+		handleException();
+	} else if (child == 0) {
+		int inFd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+		if (inFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of /dev/null failed");
+			handleException();
+		}
+		int outFd = open("gatord.out", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
+		if (outFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.out failed");
+			handleException();
+		}
+		int errFd = open("gatord.err", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
+		if (errFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.err failed");
+			handleException();
+		}
+		if (dup2(inFd, STDIN_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdin failed");
+			handleException();
+		}
+		if (dup2(outFd, STDOUT_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdout failed");
+			handleException();
+		}
+		if (dup2(errFd, STDERR_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stderr failed");
+			handleException();
+		}
+		execlp(newGatorPath, newGatorPath, "-a", NULL);
+		logg->logError(__FILE__, __LINE__, "gator: error: execlp failed");
+		handleException();
+	}
+
+	printf("gator: done\n");
+
+	return 0;
+}
diff --git a/tools/gator/daemon/Setup.h b/tools/gator/daemon/Setup.h
new file mode 100644
index 000000000000..280d61139784
--- /dev/null
+++ b/tools/gator/daemon/Setup.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SETUP_H
+#define SETUP_H
+
+// From include/generated/uapi/linux/version.h
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+
+bool getLinuxVersion(int version[3]);
+int update(const char *const gatorPath);
+
+#endif // SETUP_H
diff --git a/tools/gator/daemon/Source.cpp b/tools/gator/daemon/Source.cpp
new file mode 100644
index 000000000000..60cf704e599b
--- /dev/null
+++ b/tools/gator/daemon/Source.cpp
@@ -0,0 +1,33 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Source.h"
+
+#include "Logging.h"
+
+Source::Source() : mThreadID() {
+}
+
+Source::~Source() {
+}
+
+void Source::start() {
+	if (pthread_create(&mThreadID, NULL, runStatic, this)) {
+		logg->logError(__FILE__, __LINE__, "Failed to create source thread");
+		handleException();
+	}
+}
+
+void Source::join() {
+	pthread_join(mThreadID, NULL);
+}
+
+void *Source::runStatic(void *arg) {
+	static_cast<Source *>(arg)->run();
+	return NULL;
+}
diff --git a/tools/gator/daemon/Source.h b/tools/gator/daemon/Source.h
new file mode 100644
index 000000000000..56ac3d6e94f3
--- /dev/null
+++ b/tools/gator/daemon/Source.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SOURCE_H
+#define SOURCE_H
+
+#include <pthread.h>
+
+class Sender;
+
+class Source {
+public:
+	Source();
+	virtual ~Source();
+
+	virtual bool prepare() = 0;
+	void start();
+	virtual void run() = 0;
+	virtual void interrupt() = 0;
+	void join();
+
+	virtual bool isDone() = 0;
+	virtual void write(Sender *sender) = 0;
+
+private:
+	static void *runStatic(void *arg);
+
+	pthread_t mThreadID;
+
+	// Intentionally undefined
+	Source(const Source &);
+	Source &operator=(const Source &);
+};
+
+#endif // SOURCE_H
diff --git a/tools/gator/daemon/StreamlineSetup.cpp b/tools/gator/daemon/StreamlineSetup.cpp
new file mode 100644
index 000000000000..2b61eaeb290d
--- /dev/null
+++ b/tools/gator/daemon/StreamlineSetup.cpp
@@ -0,0 +1,272 @@
+/**
+ * Copyright (C) ARM Limited 2011-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "StreamlineSetup.h"
+
+#include "Buffer.h"
+#include "CapturedXML.h"
+#include "ConfigurationXML.h"
+#include "Driver.h"
+#include "EventsXML.h"
+#include "Logging.h"
+#include "OlySocket.h"
+#include "OlyUtility.h"
+#include "Sender.h"
+#include "SessionData.h"
+
+static const char* TAG_SESSION = "session";
+static const char* TAG_REQUEST = "request";
+static const char* TAG_CONFIGURATIONS = "configurations";
+
+static const char* ATTR_TYPE           = "type";
+static const char* VALUE_EVENTS        = "events";
+static const char* VALUE_CONFIGURATION = "configuration";
+static const char* VALUE_COUNTERS      = "counters";
+static const char* VALUE_CAPTURED      = "captured";
+static const char* VALUE_DEFAULTS      = "defaults";
+
+StreamlineSetup::StreamlineSetup(OlySocket* s) {
+	bool ready = false;
+	char* data = NULL;
+	int type;
+
+	mSocket = s;
+
+	// Receive commands from Streamline (master)
+	while (!ready) {
+		// receive command over socket
+		gSessionData->mWaitingOnCommand = true;
+		data = readCommand(&type);
+
+		// parse and handle data
+		switch (type) {
+			case COMMAND_REQUEST_XML:
+				handleRequest(data);
+				break;
+			case COMMAND_DELIVER_XML:
+				handleDeliver(data);
+				break;
+			case COMMAND_APC_START:
+				logg->logMessage("Received apc start request");
+				ready = true;
+				break;
+			case COMMAND_APC_STOP:
+				logg->logMessage("Received apc stop request before apc start request");
+				exit(0);
+				break;
+			case COMMAND_DISCONNECT:
+				logg->logMessage("Received disconnect command");
+				exit(0);
+				break;
+			case COMMAND_PING:
+				logg->logMessage("Received ping command");
+				sendData(NULL, 0, RESPONSE_ACK);
+				break;
+			default:
+				logg->logError(__FILE__, __LINE__, "Target error: Unknown command type, %d", type);
+				handleException();
+		}
+
+		free(data);
+	}
+
+	if (gSessionData->mCounterOverflow > 0) {
+		logg->logError(__FILE__, __LINE__, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
+		handleException();
+	}
+}
+
+StreamlineSetup::~StreamlineSetup() {
+}
+
+char* StreamlineSetup::readCommand(int* command) {
+	unsigned char header[5];
+	char* data;
+	int response;
+
+	// receive type and length
+	response = mSocket->receiveNBytes((char*)&header, sizeof(header));
+
+	// After receiving a single byte, we are no longer waiting on a command
+	gSessionData->mWaitingOnCommand = false;
+
+	if (response < 0) {
+		logg->logError(__FILE__, __LINE__, "Target error: Unexpected socket disconnect");
+		handleException();
+	}
+
+	const char type = header[0];
+	const int length = (header[1] << 0) | (header[2] << 8) | (header[3] << 16) | (header[4] << 24);
+
+	// add artificial limit
+	if ((length < 0) || length > 1024 * 1024) {
+		logg->logError(__FILE__, __LINE__, "Target error: Invalid length received, %d", length);
+		handleException();
+	}
+
+	// allocate memory to contain the xml file, size of zero returns a zero size object
+	data = (char*)calloc(length + 1, 1);
+	if (data == NULL) {
+		logg->logError(__FILE__, __LINE__, "Unable to allocate memory for xml");
+		handleException();
+	}
+
+	// receive data
+	response = mSocket->receiveNBytes(data, length);
+	if (response < 0) {
+		logg->logError(__FILE__, __LINE__, "Target error: Unexpected socket disconnect");
+		handleException();
+	}
+
+	// null terminate the data for string parsing
+	if (length > 0) {
+		data[length] = 0;
+	}
+
+	*command = type;
+	return data;
+}
+
+void StreamlineSetup::handleRequest(char* xml) {
+	mxml_node_t *tree, *node;
+	const char * attr = NULL;
+
+	tree = mxmlLoadString(NULL, xml, MXML_NO_CALLBACK);
+	node = mxmlFindElement(tree, tree, TAG_REQUEST, ATTR_TYPE, NULL, MXML_DESCEND_FIRST);
+	if (node) {
+		attr = mxmlElementGetAttr(node, ATTR_TYPE);
+	}
+	if (attr && strcmp(attr, VALUE_EVENTS) == 0) {
+		sendEvents();
+		logg->logMessage("Sent events xml response");
+	} else if (attr && strcmp(attr, VALUE_CONFIGURATION) == 0) {
+		sendConfiguration();
+		logg->logMessage("Sent configuration xml response");
+	} else if (attr && strcmp(attr, VALUE_COUNTERS) == 0) {
+		sendCounters();
+		logg->logMessage("Sent counters xml response");
+	} else if (attr && strcmp(attr, VALUE_CAPTURED) == 0) {
+		CapturedXML capturedXML;
+		char* capturedText = capturedXML.getXML(false);
+		sendData(capturedText, strlen(capturedText), RESPONSE_XML);
+		free(capturedText);
+		logg->logMessage("Sent captured xml response");
+	} else if (attr && strcmp(attr, VALUE_DEFAULTS) == 0) {
+		sendDefaults();
+		logg->logMessage("Sent default configuration xml response");
+	} else {
+		char error[] = "Unknown request";
+		sendData(error, strlen(error), RESPONSE_NAK);
+		logg->logMessage("Received unknown request:\n%s", xml);
+	}
+
+	mxmlDelete(tree);
+}
+
+void StreamlineSetup::handleDeliver(char* xml) {
+	mxml_node_t *tree;
+
+	// Determine xml type
+	tree = mxmlLoadString(NULL, xml, MXML_NO_CALLBACK);
+	if (mxmlFindElement(tree, tree, TAG_SESSION, NULL, NULL, MXML_DESCEND_FIRST)) {
+		// Session XML
+		gSessionData->parseSessionXML(xml);
+		sendData(NULL, 0, RESPONSE_ACK);
+		logg->logMessage("Received session xml");
+	} else if (mxmlFindElement(tree, tree, TAG_CONFIGURATIONS, NULL, NULL, MXML_DESCEND_FIRST)) {
+		// Configuration XML
+		writeConfiguration(xml);
+		sendData(NULL, 0, RESPONSE_ACK);
+		logg->logMessage("Received configuration xml");
+	} else {
+		// Unknown XML
+		logg->logMessage("Received unknown XML delivery type");
+		sendData(NULL, 0, RESPONSE_NAK);
+	}
+
+	mxmlDelete(tree);
+}
+
+void StreamlineSetup::sendData(const char* data, uint32_t length, char type) {
+	unsigned char header[5];
+	header[0] = type;
+	Buffer::writeLEInt(header + 1, length);
+	mSocket->send((char*)&header, sizeof(header));
+	mSocket->send((const char*)data, length);
+}
+
+void StreamlineSetup::sendEvents() {
+	EventsXML eventsXML;
+	char* string = eventsXML.getXML();
+	sendString(string, RESPONSE_XML);
+	free(string);
+}
+
+void StreamlineSetup::sendConfiguration() {
+	ConfigurationXML xml;
+
+	const char* string = xml.getConfigurationXML();
+	sendData(string, strlen(string), RESPONSE_XML);
+}
+
+void StreamlineSetup::sendDefaults() {
+	// Send the config built into the binary
+	const char* xml;
+	unsigned int size;
+	ConfigurationXML::getDefaultConfigurationXml(xml, size);
+
+	// Artificial size restriction
+	if (size > 1024*1024) {
+		logg->logError(__FILE__, __LINE__, "Corrupt default configuration file");
+		handleException();
+	}
+
+	sendData(xml, size, RESPONSE_XML);
+}
+
+void StreamlineSetup::sendCounters() {
+	mxml_node_t *xml;
+	mxml_node_t *counters;
+
+	xml = mxmlNewXML("1.0");
+	counters = mxmlNewElement(xml, "counters");
+	int count = 0;
+	for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+		count += driver->writeCounters(counters);
+	}
+
+	if (count == 0) {
+		logg->logError(__FILE__, __LINE__, "No counters found, this could be because /dev/gator/events can not be read or because perf is not working correctly");
+		handleException();
+	}
+
+	char* string = mxmlSaveAllocString(xml, mxmlWhitespaceCB);
+	sendString(string, RESPONSE_XML);
+
+	free(string);
+	mxmlDelete(xml);
+}
+
+void StreamlineSetup::writeConfiguration(char* xml) {
+	char path[PATH_MAX];
+
+	ConfigurationXML::getPath(path);
+
+	if (util->writeToDisk(path, xml) < 0) {
+		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify write permissions to this path.", path);
+		handleException();
+	}
+
+	// Re-populate gSessionData with the configuration, as it has now changed
+	{ ConfigurationXML configuration; }
+
+	if (gSessionData->mCounterOverflow > 0) {
+		logg->logError(__FILE__, __LINE__, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
+		handleException();
+	}
+}
diff --git a/tools/gator/daemon/StreamlineSetup.h b/tools/gator/daemon/StreamlineSetup.h
new file mode 100644
index 000000000000..623e14f2b64a
--- /dev/null
+++ b/tools/gator/daemon/StreamlineSetup.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __STREAMLINE_SETUP_H__
+#define __STREAMLINE_SETUP_H__
+
+#include <stdint.h>
+#include <string.h>
+
+class OlySocket;
+
+// Commands from Streamline
+enum {
+	COMMAND_REQUEST_XML = 0,
+	COMMAND_DELIVER_XML = 1,
+	COMMAND_APC_START   = 2,
+	COMMAND_APC_STOP    = 3,
+	COMMAND_DISCONNECT  = 4,
+	COMMAND_PING        = 5
+};
+
+class StreamlineSetup {
+public:
+	StreamlineSetup(OlySocket *socket);
+	~StreamlineSetup();
+private:
+	OlySocket* mSocket;
+
+	char* readCommand(int*);
+	void handleRequest(char* xml);
+	void handleDeliver(char* xml);
+	void sendData(const char* data, uint32_t length, char type);
+	void sendString(const char* string, int type) {sendData(string, strlen(string), type);}
+	void sendEvents();
+	void sendConfiguration();
+	void sendDefaults();
+	void sendCounters();
+	void writeConfiguration(char* xml);
+
+	// Intentionally unimplemented
+	StreamlineSetup(const StreamlineSetup &);
+	StreamlineSetup &operator=(const StreamlineSetup &);
+};
+
+#endif //__STREAMLINE_SETUP_H__
diff --git a/tools/gator/daemon/UEvent.cpp b/tools/gator/daemon/UEvent.cpp
new file mode 100644
index 000000000000..f94a995393e8
--- /dev/null
+++ b/tools/gator/daemon/UEvent.cpp
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "UEvent.h"
+
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <linux/netlink.h>
+
+#include "Logging.h"
+#include "OlySocket.h"
+
+static const char EMPTY[] = "";
+static const char ACTION[] = "ACTION=";
+static const char DEVPATH[] = "DEVPATH=";
+static const char SUBSYSTEM[] = "SUBSYSTEM=";
+
+UEvent::UEvent() : mFd(-1) {
+}
+
+UEvent::~UEvent() {
+	if (mFd >= 0) {
+		close(mFd);
+	}
+}
+
+bool UEvent::init() {
+	mFd = socket_cloexec(PF_NETLINK, SOCK_RAW, NETLINK_KOBJECT_UEVENT);
+	if (mFd < 0) {
+		logg->logMessage("%s(%s:%i): socket failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	struct sockaddr_nl sockaddr;
+	memset(&sockaddr, 0, sizeof(sockaddr));
+	sockaddr.nl_family = AF_NETLINK;
+	sockaddr.nl_groups = 1; // bitmask: (1 << 0) == kernel events, (1 << 1) == udev events
+	sockaddr.nl_pid = 0;
+	if (bind(mFd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) != 0) {
+		logg->logMessage("%s(%s:%i): bind failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	return true;
+}
+
+bool UEvent::read(UEventResult *const result) {
+	ssize_t bytes = recv(mFd, result->mBuf, sizeof(result->mBuf), 0);
+	if (bytes <= 0) {
+		logg->logMessage("%s(%s:%i): recv failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	result->mAction = EMPTY;
+	result->mDevPath = EMPTY;
+	result->mSubsystem = EMPTY;
+
+	for (int pos = 0; pos < bytes; pos += strlen(result->mBuf + pos) + 1) {
+		char *const str = result->mBuf + pos;
+		if (strncmp(str, ACTION, sizeof(ACTION) - 1) == 0) {
+			result->mAction = str + sizeof(ACTION) - 1;
+		} else if (strncmp(str, DEVPATH, sizeof(DEVPATH) - 1) == 0) {
+			result->mDevPath = str + sizeof(DEVPATH) - 1;
+		} else if (strncmp(str, SUBSYSTEM, sizeof(SUBSYSTEM) - 1) == 0) {
+			result->mSubsystem = str + sizeof(SUBSYSTEM) - 1;
+		}
+	}
+
+	return true;
+}
diff --git a/tools/gator/daemon/UEvent.h b/tools/gator/daemon/UEvent.h
new file mode 100644
index 000000000000..2f7ef2c93f5d
--- /dev/null
+++ b/tools/gator/daemon/UEvent.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef UEVENT_H
+#define UEVENT_H
+
+struct UEventResult {
+	const char *mAction;
+	const char *mDevPath;
+	const char *mSubsystem;
+	char mBuf[1<<13];
+};
+
+class UEvent {
+public:
+	UEvent();
+	~UEvent();
+
+	bool init();
+	bool read(UEventResult *const result);
+	int getFd() const { return mFd; }
+
+private:
+	int mFd;
+
+	// Intentionally undefined
+	UEvent(const UEvent &);
+	UEvent &operator=(const UEvent &);
+};
+
+#endif // UEVENT_H
diff --git a/tools/gator/daemon/UserSpaceSource.cpp b/tools/gator/daemon/UserSpaceSource.cpp
new file mode 100644
index 000000000000..4a9b22f4b555
--- /dev/null
+++ b/tools/gator/daemon/UserSpaceSource.cpp
@@ -0,0 +1,94 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "UserSpaceSource.h"
+
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "Child.h"
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+extern Child *child;
+
+UserSpaceSource::UserSpaceSource(sem_t *senderSem) : mBuffer(0, FRAME_BLOCK_COUNTER, gSessionData->mTotalBufferSize*1024*1024, senderSem) {
+}
+
+UserSpaceSource::~UserSpaceSource() {
+}
+
+bool UserSpaceSource::prepare() {
+	return true;
+}
+
+void UserSpaceSource::run() {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-counters", 0, 0, 0);
+
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+		gSessionData->usDrivers[i]->start();
+	}
+
+	int64_t monotonic_started = 0;
+	while (monotonic_started <= 0) {
+		usleep(10);
+
+		if (gSessionData->perf.isSetup()) {
+			monotonic_started = gSessionData->mMonotonicStarted;
+		} else {
+			if (DriverSource::readInt64Driver("/dev/gator/started", &monotonic_started) == -1) {
+				logg->logError(__FILE__, __LINE__, "Error reading gator driver start time");
+				handleException();
+			}
+			gSessionData->mMonotonicStarted = monotonic_started;
+		}
+	}
+
+	uint64_t next_time = 0;
+	while (gSessionData->mSessionIsActive) {
+		const uint64_t curr_time = getTime() - monotonic_started;
+		// Sample ten times a second ignoring gSessionData->mSampleRate
+		next_time += NS_PER_S/10;//gSessionData->mSampleRate;
+		if (next_time < curr_time) {
+			logg->logMessage("Too slow, curr_time: %lli next_time: %lli", curr_time, next_time);
+			next_time = curr_time;
+		}
+
+		if (mBuffer.eventHeader(curr_time)) {
+			for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+				gSessionData->usDrivers[i]->read(&mBuffer);
+			}
+			// Only check after writing all counters so that time and corresponding counters appear in the same frame
+			mBuffer.check(curr_time);
+		}
+
+		if (mBuffer.bytesAvailable() <= 0) {
+			logg->logMessage("One shot (counters)");
+			child->endSession();
+		}
+
+		usleep((next_time - curr_time)/NS_PER_US);
+	}
+
+	mBuffer.setDone();
+}
+
+void UserSpaceSource::interrupt() {
+	// Do nothing
+}
+
+bool UserSpaceSource::isDone() {
+	return mBuffer.isDone();
+}
+
+void UserSpaceSource::write(Sender *sender) {
+	if (!mBuffer.isDone()) {
+		mBuffer.write(sender);
+	}
+}
diff --git a/tools/gator/daemon/UserSpaceSource.h b/tools/gator/daemon/UserSpaceSource.h
new file mode 100644
index 000000000000..9b3666016dc5
--- /dev/null
+++ b/tools/gator/daemon/UserSpaceSource.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef USERSPACESOURCE_H
+#define USERSPACESOURCE_H
+
+#include <semaphore.h>
+
+#include "Buffer.h"
+#include "Source.h"
+
+// User space counters
+class UserSpaceSource : public Source {
+public:
+	UserSpaceSource(sem_t *senderSem);
+	~UserSpaceSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+private:
+	Buffer mBuffer;
+
+	// Intentionally unimplemented
+	UserSpaceSource(const UserSpaceSource &);
+	UserSpaceSource &operator=(const UserSpaceSource &);
+};
+
+#endif // USERSPACESOURCE_H
diff --git a/tools/gator/daemon/c++.cpp b/tools/gator/daemon/c++.cpp
new file mode 100644
index 000000000000..6041e5e96469
--- /dev/null
+++ b/tools/gator/daemon/c++.cpp
@@ -0,0 +1,40 @@
+/**
+ * Minimal set of C++ functions so that libstdc++ is not required
+ *
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void operator delete(void *ptr) {
+  if (ptr != NULL) {
+    free(ptr);
+  }
+}
+
+void operator delete[](void *ptr) {
+  operator delete(ptr);
+}
+
+void *operator new(size_t size) {
+  void *ptr = malloc(size == 0 ? 1 : size);
+  if (ptr == NULL) {
+    abort();
+  }
+  return ptr;
+}
+
+void *operator new[](size_t size) {
+  return operator new(size);
+}
+
+extern "C"
+void __cxa_pure_virtual() {
+  printf("pure virtual method called\n");
+  abort();
+}
diff --git a/tools/gator/daemon/common.mk b/tools/gator/daemon/common.mk
new file mode 100644
index 000000000000..769a92e51a35
--- /dev/null
+++ b/tools/gator/daemon/common.mk
@@ -0,0 +1,52 @@
+# -g produces debugging information
+# -O3 maximum optimization
+# -O0 no optimization, used for debugging
+# -Wall enables most warnings
+# -Werror treats warnings as errors
+# -std=c++0x is the planned new c++ standard
+# -std=c++98 is the 1998 c++ standard
+CPPFLAGS += -O3 -Wall -fno-exceptions -pthread -MMD -DETCDIR=\"/etc\" -Ilibsensors
+CXXFLAGS += -fno-rtti -Wextra # -Weffc++
+ifeq ($(WERROR),1)
+	CPPFLAGS += -Werror
+endif
+# -s strips the binary of debug info
+LDFLAGS += -s
+LDLIBS += -lrt -lm -pthread
+TARGET = gatord
+C_SRC = $(wildcard mxml/*.c) $(wildcard libsensors/*.c)
+CXX_SRC = $(wildcard *.cpp)
+
+all: $(TARGET)
+
+events.xml: events_header.xml $(wildcard events-*.xml) events_footer.xml
+	cat $^ > $@
+
+include $(wildcard *.d)
+include $(wildcard mxml/*.d)
+
+EventsXML.cpp: events_xml.h
+ConfigurationXML.cpp: defaults_xml.h
+
+# Don't regenerate conf-lex.c or conf-parse.c
+libsensors/conf-lex.c: ;
+libsensors/conf-parse.c: ;
+
+%_xml.h: %.xml escape
+	./escape $< > $@
+
+%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+
+%.o: %.cpp
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+
+$(TARGET): $(CXX_SRC:%.cpp=%.o) $(C_SRC:%.c=%.o)
+	$(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@
+
+# Intentionally ignore CC as a native binary is required
+escape: escape.c
+	gcc $^ -o $@
+
+clean:
+	rm -f *.d *.o mxml/*.d mxml/*.o libsensors/*.d libsensors/*.o $(TARGET) escape events.xml events_xml.h defaults_xml.h
diff --git a/tools/gator/daemon/defaults.xml b/tools/gator/daemon/defaults.xml
new file mode 100644
index 000000000000..086eca1e804e
--- /dev/null
+++ b/tools/gator/daemon/defaults.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configurations revision="3">
+  <configuration counter="ARM_ARM11_ccnt" event="0xff"/>
+  <configuration counter="ARM_ARM11_cnt0" event="0x7"/>
+  <configuration counter="ARM_ARM11_cnt1" event="0xb"/>
+  <configuration counter="ARM_ARM11MPCore_ccnt" event="0xff"/>
+  <configuration counter="ARM_ARM11MPCore_cnt0" event="0x08"/>
+  <configuration counter="ARM_ARM11MPCore_cnt1" event="0x0b"/>
+  <configuration counter="ARMv7_Cortex_A5_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A5_cnt0" event="0x8"/>
+  <configuration counter="ARMv7_Cortex_A5_cnt1" event="0x1"/>
+  <configuration counter="ARMv7_Cortex_A7_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A7_cnt0" event="0x08"/>
+  <configuration counter="ARMv7_Cortex_A7_cnt1" event="0x10"/>
+  <configuration counter="ARMv7_Cortex_A7_cnt2" event="0x16"/>
+  <configuration counter="ARMv7_Cortex_A8_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A8_cnt0" event="0x8"/>
+  <configuration counter="ARMv7_Cortex_A8_cnt1" event="0x44"/>
+  <configuration counter="ARMv7_Cortex_A8_cnt2" event="0x43"/>
+  <configuration counter="ARMv7_Cortex_A8_cnt3" event="0x10"/>
+  <configuration counter="ARMv7_Cortex_A9_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A9_cnt0" event="0x68"/>
+  <configuration counter="ARMv7_Cortex_A9_cnt1" event="0x06"/>
+  <configuration counter="ARMv7_Cortex_A9_cnt2" event="0x07"/>
+  <configuration counter="ARMv7_Cortex_A9_cnt3" event="0x03"/>
+  <configuration counter="ARMv7_Cortex_A9_cnt4" event="0x04"/>
+  <configuration counter="ARMv7_Cortex_A15_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A15_cnt0" event="0x8"/>
+  <configuration counter="ARMv7_Cortex_A15_cnt1" event="0x16"/>
+  <configuration counter="ARMv7_Cortex_A15_cnt2" event="0x10"/>
+  <configuration counter="ARMv7_Cortex_A15_cnt3" event="0x19"/>
+  <configuration counter="ARMv7_Cortex_A17_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt0" event="0x08"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt1" event="0x16"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt2" event="0x10"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt3" event="0x19"/>
+  <configuration counter="ARM_Cortex-A53_ccnt" event="0x11"/>
+  <configuration counter="ARM_Cortex-A53_cnt0" event="0x8"/>
+  <configuration counter="ARM_Cortex-A53_cnt1" event="0x16"/>
+  <configuration counter="ARM_Cortex-A53_cnt2" event="0x10"/>
+  <configuration counter="ARM_Cortex-A53_cnt3" event="0x19"/>
+  <configuration counter="ARM_Cortex-A57_ccnt" event="0x11"/>
+  <configuration counter="ARM_Cortex-A57_cnt0" event="0x8"/>
+  <configuration counter="ARM_Cortex-A57_cnt1" event="0x16"/>
+  <configuration counter="ARM_Cortex-A57_cnt2" event="0x10"/>
+  <configuration counter="ARM_Cortex-A57_cnt3" event="0x19"/>
+  <configuration counter="Scorpion_ccnt" event="0xff"/>
+  <configuration counter="Scorpion_cnt0" event="0x08"/>
+  <configuration counter="Scorpion_cnt1" event="0x10"/>
+  <configuration counter="ScorpionMP_ccnt" event="0xff"/>
+  <configuration counter="ScorpionMP_cnt0" event="0x08"/>
+  <configuration counter="ScorpionMP_cnt1" event="0x10"/>
+  <configuration counter="Krait_ccnt" event="0xff"/>
+  <configuration counter="Krait_cnt0" event="0x08"/>
+  <configuration counter="Krait_cnt1" event="0x10"/>
+  <configuration counter="Linux_block_rq_wr"/>
+  <configuration counter="Linux_block_rq_rd"/>
+  <configuration counter="Linux_meminfo_memused"/>
+  <configuration counter="Linux_meminfo_memused2"/>
+  <configuration counter="Linux_meminfo_memfree"/>
+  <configuration counter="Linux_power_cpu_freq"/>
+  <configuration counter="ARM_Mali-4xx_fragment"/>
+  <configuration counter="ARM_Mali-4xx_vertex"/>
+  <configuration counter="ARM_Mali-Midgard_fragment" cores="1"/>
+  <configuration counter="ARM_Mali-Midgard_vertex" cores="1"/>
+  <configuration counter="ARM_Mali-Midgard_opencl" cores="1"/>
+  <configuration counter="ARM_Mali-T60x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS2_ACTIVE"/>
+  <configuration counter="L2C-310_cnt0" event="0x1"/>
+</configurations>
diff --git a/tools/gator/daemon/escape.c b/tools/gator/daemon/escape.c
new file mode 100644
index 000000000000..2b0863aaf425
--- /dev/null
+++ b/tools/gator/daemon/escape.c
@@ -0,0 +1,75 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The Makefile in the daemon folder builds and executes 'escape'
+ * 'escape' creates configuration_xml.h from configuration.xml and events_xml.h from events-*.xml
+ * these genereated xml files are then #included and built as part of the gatord binary
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static void print_escaped_path(char *path) {
+  if (isdigit(*path)) {
+    printf("__");
+  }
+  for (; *path != '\0'; ++path) {
+    printf("%c", isalnum(*path) ? *path : '_');
+  }
+}
+
+int main(int argc, char *argv[]) {
+  int i;
+  char *path;
+  FILE *in = NULL;
+  int ch;
+  unsigned int len = 0;
+
+  for (i = 1; i < argc && argv[i][0] == '-'; ++i) ;
+  if (i == argc) {
+    fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+  path = argv[i];
+
+  errno = 0;
+  if ((in = fopen(path, "r")) == NULL) {
+    fprintf(stderr, "Unable to open '%s': %s\n", path, strerror(errno));
+    return EXIT_FAILURE;
+  }
+
+  printf("static const unsigned char ");
+  print_escaped_path(path);
+  printf("[] = {");
+  for (;;) {
+    ch = fgetc(in);
+    if (len != 0) {
+      printf(",");
+    }
+    if (len % 12 == 0) {
+      printf("\n ");
+    }
+    // Write out a null character after the contents of the file but do not increment len
+    printf(" 0x%.2x", (ch == EOF ? 0 : ch));
+    if (ch == EOF) {
+      break;
+    }
+    ++len;
+  }
+  printf("\n};\nstatic const unsigned int ");
+  print_escaped_path(path);
+  printf("_len = %i;\n", len);
+
+  fclose(in);
+
+  return EXIT_SUCCESS;
+}
diff --git a/tools/gator/daemon/events-ARM11.xml b/tools/gator/daemon/events-ARM11.xml
new file mode 100644
index 000000000000..57e323546314
--- /dev/null
+++ b/tools/gator/daemon/events-ARM11.xml
@@ -0,0 +1,39 @@
+  <counter_set name="ARM_ARM11_cnt" count="3"/>
+  <category name="ARM11" counter_set="ARM_ARM11_cnt" per_cpu="yes">
+    <event counter="ARM_ARM11_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Cache" name="Inst miss" description="Instruction cache miss to a cacheable location, which requires a fetch from external memory"/>
+    <event event="0x01" title="Pipeline" name="Instruction stall" description="Stall because instruction buffer cannot deliver an instruction"/>
+    <event event="0x02" title="Pipeline" name="Data stall" description="Stall because of a data dependency"/>
+    <event event="0x03" title="Cache" name="Inst micro TLB miss" description="Instruction MicroTLB miss (unused on ARM1156)"/>
+    <event event="0x04" title="Cache" name="Data micro TLB miss" description="Data MicroTLB miss (unused on ARM1156)"/>
+    <event event="0x05" title="Branch" name="Instruction executed" description="Branch instruction executed, branch might or might not have changed program flow"/>
+    <event event="0x06" title="Branch" name="Mispredicted" description="Branch mis-predicted"/>
+    <event event="0x07" title="Instruction" name="Executed" description="Instructions executed"/>
+    <event event="0x09" title="Cache" name="Data access" description="Data cache access, not including Cache operations"/>
+    <event event="0x0a" title="Cache" name="Data all access" description="Data cache access, not including Cache Operations regardless of whether or not the location is cacheable"/>
+    <event event="0x0b" title="Cache" name="Data miss" description="Data cache miss, not including Cache Operations"/>
+    <event event="0x0c" title="Cache" name="Write-back" description="Data cache write-back"/>
+    <event event="0x0d" title="Program Counter" name="SW change" description="Software changed the PC"/>
+    <event event="0x0f" title="Cache " name="TLB miss" description="Main TLB miss (unused on ARM1156)"/>
+    <event event="0x10" title="External" name="Access" description="Explicit external data or peripheral access"/>
+    <event event="0x11" title="Cache" name="Data miss" description="Stall because of Load Store Unit request queue being full"/>
+    <event event="0x12" title="Write Buffer" name="Drains" description="The number of times the Write Buffer was drained because of a Data Synchronization Barrier command or Strongly Ordered operation"/>
+    <event event="0x13" title="Disable Interrupts" name="FIQ" description="The number of cycles which FIQ interrupts are disabled (ARM1156 only)"/>
+    <event event="0x14" title="Disable Interrupts" name="IRQ" description="The number of cycles which IRQ interrupts are disabled (ARM1156 only)"/>
+    <event event="0x20" title="ETM" name="ETMEXTOUT[0]" description="ETMEXTOUT[0] signal was asserted for a cycle"/>
+    <event event="0x21" title="ETM" name="ETMEXTOUT[1]" description="ETMEXTOUT[1] signal was asserted for a cycle"/>
+    <event event="0x22" title="ETM" name="ETMEXTOUT[0,1]" description="ETMEXTOUT[0] or ETMEXTOUT[1] was asserted"/>
+    <event event="0x23" title="Procedure" name="Calls" description="Procedure call instruction executed"/>
+    <event event="0x24" title="Procedure" name="Returns" description="Procedure return instruction executed"/>
+    <event event="0x25" title="Procedure" name="Return and predicted" description="Procedure return instruction executed and return address predicted"/>
+    <event event="0x26" title="Procedure" name="Return and mispredicted" description="Procedure return instruction executed and return address predicted incorrectly"/>
+    <event event="0x30" title="Cache" name="Inst tag or parity error" description="Instruction cache Tag or Valid RAM parity error (ARM1156 only)"/>
+    <event event="0x31" title="Cache" name="Inst parity error" description="Instruction cache RAM parity error (ARM1156 only)"/>
+    <event event="0x32" title="Cache" name="Data tag or parity error" description="Data cache Tag or Valid RAM parity error (ARM1156 only)"/>
+    <event event="0x33" title="Cache" name="Data parity error" description="Data cache RAM parity error (ARM1156 only)"/>
+    <event event="0x34" title="ITCM" name="Error" description="ITCM error (ARM1156 only)"/>
+    <event event="0x35" title="DTCM" name="Error" description="DTCM error (ARM1156 only)"/>
+    <event event="0x36" title="Procedure" name="Return address pop" description="Procedure return address popped off the return stack (ARM1156 only)"/>
+    <event event="0x37" title="Procedure" name="Return address misprediction" description="Procedure return address popped off the return stack has been incorrectly predicted by the PFU (ARM1156 only)"/>
+    <event event="0x38" title="Cache" name="Data dirty parity error" description="Data cache Dirty RAM parity error (ARM1156 only)"/>
+  </category>
diff --git a/tools/gator/daemon/events-ARM11MPCore.xml b/tools/gator/daemon/events-ARM11MPCore.xml
new file mode 100644
index 000000000000..2d5c5e199e66
--- /dev/null
+++ b/tools/gator/daemon/events-ARM11MPCore.xml
@@ -0,0 +1,26 @@
+  <counter_set name="ARM_ARM11MPCore_cnt" count="3"/>
+  <category name="ARM11MPCore" counter_set="ARM_ARM11MPCore_cnt" per_cpu="yes">
+    <event counter="ARM_ARM11MPCore_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Cache" name="Inst miss" description="Instruction cache miss to a cacheable location, which requires a fetch from external memory"/>
+    <event event="0x01" title="Pipeline" name="Instruction stall" description="Stall because instruction buffer cannot deliver an instruction"/>
+    <event event="0x02" title="Pipeline" name="Data stall" description="Stall because of a data dependency"/>
+    <event event="0x03" title="Cache" name="Inst micro TLB miss" description="Instruction MicroTLB miss (unused on ARM1156)"/>
+    <event event="0x04" title="Cache" name="Data micro TLB miss" description="Data MicroTLB miss (unused on ARM1156)"/>
+    <event event="0x05" title="Branch" name="Instruction executed" description="Branch instructions executed, branch might or might not have changed program flow"/>
+    <event event="0x06" title="Branch" name="Not predicted" description="Branch not predicted"/>
+    <event event="0x07" title="Branch" name="Mispredicted" description="Branch mispredicted"/>
+    <event event="0x08" title="Core" name="Instructions" description="Instructions executed"/>
+    <event event="0x09" title="Core" name="Folded Instructions" description="Folded instructions executed"/>
+    <event event="0x0a" title="Cache" name="Data read access" description="Data cache read access, not including cache operations"/>
+    <event event="0x0b" title="Cache" name="Data read miss" description="Data cache miss, not including Cache Operations"/>
+    <event event="0x0c" title="Cache" name="Data write access" description="Data cache write access"/>
+    <event event="0x0d" title="Cache" name="Data write miss" description="Data cache write miss"/>
+    <event event="0x0e" title="Cache" name="Data line eviction" description="Data cache line eviction, not including cache operations"/>
+    <event event="0x0f" title="Branch" name="PC change w/o mode change" description="Software changed the PC and there is not a mode change"/>
+    <event event="0x10" title="Cache " name="TLB miss" description="Main TLB miss"/>
+    <event event="0x11" title="External" name="External Memory request" description="External memory request (cache refill, noncachable, write-back)"/>
+    <event event="0x12" title="Cache" name="Stall" description="Stall because of Load Store Unit request queue being full"/>
+    <event event="0x13" title="Write Buffer" name="Drains" description="The number of times the Write Buffer was drained because of LSU ordering constraints or CP15 operations (Data Synchronization Barrier command) or Strongly Ordered operation"/>
+    <event event="0x14" title="Write Buffer" name="Write Merges" description="Buffered write merged in a store buffer slot"/>
+    <event event="0xFF" title="Core" name="Cycle counter" description="An increment each cycle"/>
+  </category>
diff --git a/tools/gator/daemon/events-CCI-400.xml b/tools/gator/daemon/events-CCI-400.xml
new file mode 100644
index 000000000000..20002efd1543
--- /dev/null
+++ b/tools/gator/daemon/events-CCI-400.xml
@@ -0,0 +1,98 @@
+  <counter_set name="CCI_400_cnt" count="4"/>
+  <category name="CCI-400" counter_set="CCI_400_cnt" per_cpu="no" supports_event_based_sampling="yes">
+    <event counter="CCI_400_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
+    <option_set name="Slave">
+      <option event_delta="0x00" name="S0" description="Slave interface 0"/>
+      <option event_delta="0x20" name="S1" description="Slave interface 1"/>
+      <option event_delta="0x40" name="S2" description="Slave interface 2"/>
+      <option event_delta="0x60" name="S3" description="Slave interface 3"/>
+      <option event_delta="0x80" name="S4" description="Slave interface 4"/>
+    </option_set>
+    <event event="0x00" option_set="Slave" title="CCI-400" name="Read: any" description="Read request handshake: any"/>
+    <event event="0x01" option_set="Slave" title="CCI-400" name="Read: transaction" description="Read request handshake: device transaction"/>
+    <event event="0x02" option_set="Slave" title="CCI-400" name="Read: normal" description="Read request handshake: normal, non-shareable or system-shareable, but not barrier or cache maintenance operation"/>
+    <event event="0x03" option_set="Slave" title="CCI-400" name="Read: shareable" description="Read request handshake: inner- or outer-shareable, but not barrier, DVM message or cache maintenance operation"/>
+    <event event="0x04" option_set="Slave" title="CCI-400" name="Read: cache" description="Read request handshake: cache maintenance operation, CleanInvalid, CleanShared, MakeInvalid"/>
+    <event event="0x05" option_set="Slave" title="CCI-400" name="Read: memory barrier" description="Read request handshake: memory barrier"/>
+    <event event="0x06" option_set="Slave" title="CCI-400" name="Read: sync barrier" description="Read request handshake: synchronization barrier"/>
+    <event event="0x07" option_set="Slave" title="CCI-400" name="Read: DVM message, no sync" description="Read request handshake: DVM message, not synchronization"/>
+    <event event="0x08" option_set="Slave" title="CCI-400" name="Read: DVM message, sync" description="Read request handshake: DVM message, synchronization"/>
+    <event event="0x09" option_set="Slave" title="CCI-400" name="Read: stall" description="Read request stall cycle because the transaction tracker is full. Increase SIx_R_MAX to avoid this stall"/>
+    <event event="0x0a" option_set="Slave" title="CCI-400" name="Read data last handshake" description="Read data last handshake: data returned from the snoop instead of from downstream"/>
+    <event event="0x0b" option_set="Slave" title="CCI-400" name="Read data stall cycle" description="Read data stall cycle: RVALIDS is HIGH, RREADYS is LOW"/>
+    <event event="0x0c" option_set="Slave" title="CCI-400" name="Write: any" description="Write request handshake: any"/>
+    <event event="0x0d" option_set="Slave" title="CCI-400" name="Write: transaction" description="Write request handshake: device transaction"/>
+    <event event="0x0e" option_set="Slave" title="CCI-400" name="Write: normal" description="Write request handshake: normal, non-shareable, or system-shareable, but not barrier"/>
+    <event event="0x0f" option_set="Slave" title="CCI-400" name="Write: shareable" description="Write request handshake: inner- or outer-shareable, WriteBack or WriteClean"/>
+    <event event="0x10" option_set="Slave" title="CCI-400" name="Write: WriteUnique" description="Write request handshake: WriteUnique"/>
+    <event event="0x11" option_set="Slave" title="CCI-400" name="Write: WriteLineUnique" description="Write request handshake: WriteLineUnique"/>
+    <event event="0x12" option_set="Slave" title="CCI-400" name="Write: Evict" description="Write request handshake: Evict"/>
+    <event event="0x13" option_set="Slave" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase SIx_W_MAX to avoid this stall"/>
+    <option_set name="Master">
+      <option event_delta="0xa0" name="M0" description="Master interface 0"/>
+      <option event_delta="0xc0" name="M1" description="Master interface 1"/>
+      <option event_delta="0xe0" name="M2" description="Master interface 2"/>
+    </option_set>
+    <event event="0x14" option_set="Master" title="CCI-400" name="Retry fetch" description="RETRY of speculative fetch transaction"/>
+    <event event="0x15" option_set="Master" title="CCI-400" name="Read stall: address hazard" description="Read request stall cycle because of an address hazard"/>
+    <event event="0x16" option_set="Master" title="CCI-400" name="Read stall: ID hazard" description="Read request stall cycle because of an ID hazard"/>
+    <event event="0x17" option_set="Master" title="CCI-400" name="Read stall: tracker full" description="Read request stall cycle because the transaction tracker is full. Increase MIx_R_MAX to avoid this stall. See the CoreLink CCI-400 Cache Coherent Interconnect Integration Manual"/>
+    <event event="0x18" option_set="Master" title="CCI-400" name="Read stall: barrier hazard" description="Read request stall cycle because of a barrier hazard"/>
+    <event event="0x19" option_set="Master" title="CCI-400" name="Write stall: barrier hazard" description="Write request stall cycle because of a barrier hazard"/>
+    <event event="0x1a" option_set="Master" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase MIx_W_MAX to avoid this stall. See the CoreLink CCI-400 Cache Coherent Interconnect Integration Manual"/>
+  </category>
+  <counter_set name="CCI_400-r1_cnt" count="4"/>
+  <category name="CCI-400" counter_set="CCI_400-r1_cnt" per_cpu="no" supports_event_based_sampling="yes">
+    <event counter="CCI_400-r1_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
+    <option_set name="Slave">
+      <option event_delta="0x00" name="S0" description="Slave interface 0"/>
+      <option event_delta="0x20" name="S1" description="Slave interface 1"/>
+      <option event_delta="0x40" name="S2" description="Slave interface 2"/>
+      <option event_delta="0x60" name="S3" description="Slave interface 3"/>
+      <option event_delta="0x80" name="S4" description="Slave interface 4"/>
+    </option_set>
+    <event event="0x00" option_set="Slave" title="CCI-400" name="Read: any" description="Read request handshake: any"/>
+    <event event="0x01" option_set="Slave" title="CCI-400" name="Read: transaction" description="Read request handshake: device transaction"/>
+    <event event="0x02" option_set="Slave" title="CCI-400" name="Read: normal" description="Read request handshake: normal, non-shareable or system-shareable, but not barrier or cache maintenance operation"/>
+    <event event="0x03" option_set="Slave" title="CCI-400" name="Read: shareable" description="Read request handshake: inner- or outer-shareable, but not barrier, DVM message or cache maintenance operation"/>
+    <event event="0x04" option_set="Slave" title="CCI-400" name="Read: cache" description="Read request handshake: cache maintenance operation"/>
+    <event event="0x05" option_set="Slave" title="CCI-400" name="Read: memory barrier" description="Read request handshake: memory barrier"/>
+    <event event="0x06" option_set="Slave" title="CCI-400" name="Read: sync barrier" description="Read request handshake: synchronization barrier"/>
+    <event event="0x07" option_set="Slave" title="CCI-400" name="Read: DVM message, no sync" description="Read request handshake: DVM message, not synchronization"/>
+    <event event="0x08" option_set="Slave" title="CCI-400" name="Read: DVM message, sync" description="Read request handshake: DVM message, synchronization"/>
+    <event event="0x09" option_set="Slave" title="CCI-400" name="Read: stall" description="Read request stall cycle because the transaction tracker is full. Increase SIx_R_MAX to avoid this stall"/>
+    <event event="0x0a" option_set="Slave" title="CCI-400" name="Read data last handshake" description="Read data last handshake: data returned from the snoop instead of from downstream"/>
+    <event event="0x0b" option_set="Slave" title="CCI-400" name="Read data stall cycle" description="Read data stall cycle: RVALIDS is HIGH, RREADYS is LOW"/>
+    <event event="0x0c" option_set="Slave" title="CCI-400" name="Write: any" description="Write request handshake: any"/>
+    <event event="0x0d" option_set="Slave" title="CCI-400" name="Write: transaction" description="Write request handshake: device transaction"/>
+    <event event="0x0e" option_set="Slave" title="CCI-400" name="Write: normal" description="Write request handshake: normal, non-shareable, or system-shareable, but not barrier"/>
+    <event event="0x0f" option_set="Slave" title="CCI-400" name="Write: shareable" description="Write request handshake: inner- or outer-shareable, WriteBack or WriteClean"/>
+    <event event="0x10" option_set="Slave" title="CCI-400" name="Write: WriteUnique" description="Write request handshake: WriteUnique"/>
+    <event event="0x11" option_set="Slave" title="CCI-400" name="Write: WriteLineUnique" description="Write request handshake: WriteLineUnique"/>
+    <event event="0x12" option_set="Slave" title="CCI-400" name="Write: Evict" description="Write request handshake: Evict"/>
+    <event event="0x13" option_set="Slave" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase SIx_W_MAX to avoid this stall"/>
+    <event event="0x14" option_set="Slave" title="CCI-400" name="Read stall: slave hazard" description="Read request stall cycle because of a slave interface ID hazard"/>
+    <option_set name="Master">
+      <option event_delta="0xa0" name="M0" description="Master interface 0"/>
+      <option event_delta="0xc0" name="M1" description="Master interface 1"/>
+      <option event_delta="0xe0" name="M2" description="Master interface 2"/>
+    </option_set>
+    <event event="0x00" option_set="Master" title="CCI-400" name="Retry fetch" description="RETRY of speculative fetch transaction"/>
+    <event event="0x01" option_set="Master" title="CCI-400" name="Read stall: address hazard" description="Stall cycle because of an address hazard. A read or write invalidation is stalled because of an outstanding transaction to an overlapping address"/>
+    <event event="0x02" option_set="Master" title="CCI-400" name="Read stall: ID hazard" description="Read request stall cycle because of a master interface ID hazard"/>
+    <event event="0x03" option_set="Master" title="CCI-400" name="Read stall: tracker full" description="A read request with a QoS value in the high priority group is stalled for a cycle because the read transaction queue is full. Increase MIx_R_MAX to avoid this stall"/>
+    <event event="0x04" option_set="Master" title="CCI-400" name="Read stall: barrier hazard" description="Read request stall cycle because of a barrier hazard"/>
+    <event event="0x05" option_set="Master" title="CCI-400" name="Write stall: barrier hazard" description="Write request stall cycle because of a barrier hazard"/>
+    <event event="0x06" option_set="Master" title="CCI-400" name="Write stall: tracker full" description="A write request is stalled for a cycle because the write transaction tracker is full. Increase MIx_W_MAX to avoid this stall"/>
+    <event event="0x07" option_set="Master" title="CCI-400" name="Read Stall: Low Priority" description="A read request with a QoS value in the low priority group is stalled for a cycle because there are no slots available in the read queue for the low priority group"/>
+    <event event="0x08" option_set="Master" title="CCI-400" name="Read Stall: Medium Priority" description="A read request with a QoS value in the medium priority group is stalled for a cycle because there are no slots available in the read queue for the medium priority group"/>
+    <event event="0x09" option_set="Master" title="CCI-400" name="Read Stall: VN0" description="A read request is stalled for a cycle while it was waiting for a QVN token on VN0"/>
+    <event event="0x0a" option_set="Master" title="CCI-400" name="Read Stall: VN1" description="A read request is stalled for a cycle while it was waiting for a QVN token on VN1"/>
+    <event event="0x0b" option_set="Master" title="CCI-400" name="Read Stall: VN2" description="A read request is stalled for a cycle while it was waiting for a QVN token on VN2"/>
+    <event event="0x0c" option_set="Master" title="CCI-400" name="Read Stall: VN3" description="A read request is stalled for a cycle while it was waiting for a QVN token on VN3"/>
+    <event event="0x0d" option_set="Master" title="CCI-400" name="Write Stall: VN0" description="A write request is stalled for a cycle while it was waiting for a QVN token on VN0"/>
+    <event event="0x0e" option_set="Master" title="CCI-400" name="Write Stall: VN1" description="A write request is stalled for a cycle while it was waiting for a QVN token on VN1"/>
+    <event event="0x0f" option_set="Master" title="CCI-400" name="Write Stall: VN2" description="A write request is stalled for a cycle while it was waiting for a QVN token on VN2"/>
+    <event event="0x10" option_set="Master" title="CCI-400" name="Write Stall: VN" description="A write request is stalled for a cycle while it was waiting for a QVN token on VN"/>
+    <event event="0x11" option_set="Master" title="CCI-400" name="WriteUnique or WriteLineUnique Stall" description="A WriteUnique or WriteLineUnique request is stalled for a cycle because of an address hazard"/>
+  </category>
diff --git a/tools/gator/daemon/events-CCN-504.xml b/tools/gator/daemon/events-CCN-504.xml
new file mode 100644
index 000000000000..6ef3e6483717
--- /dev/null
+++ b/tools/gator/daemon/events-CCN-504.xml
@@ -0,0 +1,113 @@
+  <counter_set name="CCN-504_cnt" count="4"/>
+  <category name="CCN-504" counter_set="CCN-504_cnt">
+    <event counter="CCN-504_ccnt" title="CCN-504 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
+    <option_set name="XP_Region">
+      <option event_delta="0x400000" name="XP 0" description="Crosspoint 0"/>
+      <option event_delta="0x410000" name="XP 1" description="Crosspoint 1"/>
+      <option event_delta="0x420000" name="XP 2" description="Crosspoint 2"/>
+      <option event_delta="0x430000" name="XP 3" description="Crosspoint 3"/>
+      <option event_delta="0x440000" name="XP 4" description="Crosspoint 4"/>
+      <option event_delta="0x450000" name="XP 5" description="Crosspoint 5"/>
+      <option event_delta="0x460000" name="XP 6" description="Crosspoint 6"/>
+      <option event_delta="0x470000" name="XP 7" description="Crosspoint 7"/>
+      <option event_delta="0x480000" name="XP 8" description="Crosspoint 8"/>
+      <option event_delta="0x490000" name="XP 9" description="Crosspoint 9"/>
+      <option event_delta="0x4A0000" name="XP 10" description="Crosspoint 10"/>
+    </option_set>
+    <event event="0x0801" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: H-bit" description="Bus 0: REQ: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x0802" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: S-bit" description="Bus 0: REQ: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x0803" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: P-Cnt" description="Bus 0: REQ: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x0804" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: TknV" description="Bus 0: REQ: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0809" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: H-bit" description="Bus 1: REQ: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x080A" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: S-bit" description="Bus 1: REQ: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x080B" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: P-Cnt" description="Bus 1: REQ: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x080C" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: TknV" description="Bus 1: REQ: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0811" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: H-bit" description="Bus 0: RSP: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x0812" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: S-bit" description="Bus 0: RSP: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x0813" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: P-Cnt" description="Bus 0: RSP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x0814" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: TknV" description="Bus 0: RSP: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0819" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: H-bit" description="Bus 1: RSP: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x081A" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: S-bit" description="Bus 1: RSP: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x081B" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: P-Cnt" description="Bus 1: RSP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x081C" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: TknV" description="Bus 1: RSP: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0821" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: H-bit" description="Bus 0: SNP: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x0822" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: S-bit" description="Bus 0: SNP: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x0823" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: P-Cnt" description="Bus 0: SNP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x0824" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: TknV" description="Bus 0: SNP: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0829" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: H-bit" description="Bus 1: SNP: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x082A" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: S-bit" description="Bus 1: SNP: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x082B" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: P-Cnt" description="Bus 1: SNP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x082C" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: TknV" description="Bus 1: SNP: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0831" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: H-bit" description="Bus 0: DAT: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x0832" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: S-bit" description="Bus 0: DAT: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x0833" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: P-Cnt" description="Bus 0: DAT: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x0834" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: TknV" description="Bus 0: DAT: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0839" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: H-bit" description="Bus 1: DAT: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x083A" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: S-bit" description="Bus 1: DAT: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x083B" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: P-Cnt" description="Bus 1: DAT: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x083C" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: TknV" description="Bus 1: DAT: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0871" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: H-bit" description="Bus 0: DATB: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x0872" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: S-bit" description="Bus 0: DATB: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x0873" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: P-Cnt" description="Bus 0: DATB: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x0874" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: TknV" description="Bus 0: DATB: No TknV, signaled when this XP transmits a valid packet."/>
+    <event event="0x0879" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: H-bit" description="Bus 1: DATB: Set H-bit, signaled when this XP sets the H-bit."/>
+    <event event="0x087A" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: S-bit" description="Bus 1: DATB: Set S-bit, signaled when this XP sets the S-bit."/>
+    <event event="0x087B" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: P-Cnt" description="Bus 1: DATB: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+    <event event="0x087C" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: TknV" description="Bus 1: DATB: No TknV, signaled when this XP transmits a valid packet."/>
+    <option_set name="HN-F_Region">
+      <option event_delta="0x200000" name="HN-F 3" description="Fully-coherent Home Node 3"/>
+      <option event_delta="0x210000" name="HN-F 5" description="Fully-coherent Home Node 5"/>
+      <option event_delta="0x220000" name="HN-F 7" description="Fully-coherent Home Node 7"/>
+      <option event_delta="0x230000" name="HN-F 8" description="Fully-coherent Home Node 8"/>
+      <option event_delta="0x240000" name="HN-F 13" description="Fully-coherent Home Node 13"/>
+      <option event_delta="0x250000" name="HN-F 15" description="Fully-coherent Home Node 15"/>
+      <option event_delta="0x260000" name="HN-F 17" description="Fully-coherent Home Node 17"/>
+      <option event_delta="0x270000" name="HN-F 18" description="Fully-coherent Home Node 18"/>
+    </option_set>
+    <event event="0x0401" option_set="HN-F_Region" title="CCN-504" name="Cache Miss" description="Counts the total cache misses. This is the first time lookup result, and is high priority."/>
+    <event event="0x0402" option_set="HN-F_Region" title="CCN-504" name="L3 SF Cache Access" description="Counts the number of cache accesses. This is the first time access, and is high priority."/>
+    <event event="0x0403" option_set="HN-F_Region" title="CCN-504" name="Cache Fill" description="Counts the total allocations in the HN L3 cache, and all cache line allocations to the L3 cache."/>
+    <event event="0x0404" option_set="HN-F_Region" title="CCN-504" name="POCQ Retry" description="Counts the number of requests that have been retried."/>
+    <event event="0x0405" option_set="HN-F_Region" title="CCN-504" name="POCQ Reqs Recvd" description="Counts the number of requests received by HN."/>
+    <event event="0x0406" option_set="HN-F_Region" title="CCN-504" name="SF Hit" description="Counts the number of snoop filter hits."/>
+    <event event="0x0407" option_set="HN-F_Region" title="CCN-504" name="SF Evictions" description="Counts the number of snoop filter evictions. Cache invalidations are initiated."/>
+    <event event="0x0408" option_set="HN-F_Region" title="CCN-504" name="Snoops Sent" description="Counts the number of snoops sent. Does not differentiate between broadcast or directed snoops."/>
+    <event event="0x0409" option_set="HN-F_Region" title="CCN-504" name="Snoops Broadcast" description="Counts the number of snoop broadcasts sent."/>
+    <event event="0x040A" option_set="HN-F_Region" title="CCN-504" name="L3 Eviction" description="Counts the number of L3 evictions."/>
+    <event event="0x040B" option_set="HN-F_Region" title="CCN-504" name="L3 Fill Invalid Way" description="Counts the number of L3 fills to an invalid way."/>
+    <event event="0x040C" option_set="HN-F_Region" title="CCN-504" name="MC Retries" description="Counts the number of transactions retried by the memory controller."/>
+    <event event="0x040D" option_set="HN-F_Region" title="CCN-504" name="MC Reqs" description="Counts the number of requests to the memory controller."/>
+    <event event="0x040E" option_set="HN-F_Region" title="CCN-504" name="QOS HH Retry" description="Counts the number of times a highest-priority QoS class was retried at the HN-F."/>
+    <option_set name="RN-I_Region">
+      <option event_delta="0x800000" name="RN-I 0" description="I/O-coherent Requesting Node 0"/>
+      <option event_delta="0x820000" name="RN-I 2" description="I/O-coherent Requesting Node 2"/>
+      <option event_delta="0x860000" name="RN-I 6" description="I/O-coherent Requesting Node 6"/>
+      <option event_delta="0x8C0000" name="RN-I 12" description="I/O-coherent Requesting Node 12"/>
+      <option event_delta="0x900000" name="RN-I 16" description="I/O-coherent Requesting Node 16"/>
+      <option event_delta="0x940000" name="RN-I 20" description="I/O-coherent Requesting Node 20"/>
+    </option_set>
+    <event event="0x1601" option_set="RN-I_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
+    <event event="0x1602" option_set="RN-I_Region" title="CCN-504" name="S1 RDataBeats" description="S1 RDataBeats."/>
+    <event event="0x1603" option_set="RN-I_Region" title="CCN-504" name="S2 RDataBeats" description="S2 RDataBeats."/>
+    <event event="0x1604" option_set="RN-I_Region" title="CCN-504" name="RXDAT Flits received" description="RXDAT Flits received."/>
+    <event event="0x1605" option_set="RN-I_Region" title="CCN-504" name="TXDAT Flits sent" description="TXDAT Flits sent."/>
+    <event event="0x1606" option_set="RN-I_Region" title="CCN-504" name="Total TXREQ Flits sent" description="Total TXREQ Flits sent."/>
+    <event event="0x1607" option_set="RN-I_Region" title="CCN-504" name="Retried TXREQ Flits sent" description="Retried TXREQ Flits sent."/>
+    <event event="0x1608" option_set="RN-I_Region" title="CCN-504" name="RRT full" description="RRT full."/>
+    <event event="0x1609" option_set="RN-I_Region" title="CCN-504" name="WRT full" description="WRT full."/>
+    <event event="0x160A" option_set="RN-I_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
+    <option_set name="SBAS_Region">
+      <option event_delta="0x810000" name="SBAS 1" description="ACE master to CHI protocol bridge 1"/>
+      <option event_delta="0x890000" name="SBAS 9" description="ACE master to CHI protocol bridge 9"/>
+      <option event_delta="0x8B0000" name="SBAS 11" description="ACE master to CHI protocol bridge 11"/>
+      <option event_delta="0x930000" name="SBAS 19" description="ACE master to CHI protocol bridge 19"/>
+    </option_set>
+    <event event="0x1001" option_set="SBAS_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
+    <event event="0x1004" option_set="SBAS_Region" title="CCN-504" name="RXDAT Flits received" description="RXDAT Flits received."/>
+    <event event="0x1005" option_set="SBAS_Region" title="CCN-504" name="TXDAT Flits sent" description="TXDAT Flits sent."/>
+    <event event="0x1006" option_set="SBAS_Region" title="CCN-504" name="Total TXREQ Flits sent" description="Total TXREQ Flits sent."/>
+    <event event="0x1007" option_set="SBAS_Region" title="CCN-504" name="Retried TXREQ Flits sent" description="Retried TXREQ Flits sent."/>
+    <event event="0x1008" option_set="SBAS_Region" title="CCN-504" name="RRT full" description="RRT full."/>
+    <event event="0x1009" option_set="SBAS_Region" title="CCN-504" name="WRT full" description="WRT full."/>
+    <event event="0x100A" option_set="SBAS_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A15.xml b/tools/gator/daemon/events-Cortex-A15.xml
new file mode 100644
index 000000000000..f50e55d66195
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A15.xml
@@ -0,0 +1,68 @@
+  <counter_set name="ARMv7_Cortex_A15_cnt" count="6"/>
+  <category name="Cortex-A15" counter_set="ARMv7_Cortex_A15_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A15_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Software increment architecturally executed"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="L1 inst access" description="Instruction cache access"/>
+    <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
+    <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+    <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+    <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+    <event event="0x19" title="Bus" name="Access" description="Bus - Access"/>
+    <event event="0x1a" title="Memory" name="Error" description="Local memory error"/>
+    <event event="0x1b" title="Instruction" name="Speculative" description="Instruction speculatively executed"/>
+    <event event="0x1c" title="Memory" name="Translation table" description="Write to translation table base architecturally executed"/>
+    <event event="0x1d" title="Bus" name="Cycle" description="Bus - Cycle"/>
+    <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
+    <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
+    <event event="0x42" title="Cache" name="L1 data refill read" description="Level 1 data cache refill - Read"/>
+    <event event="0x43" title="Cache" name="L1 data refill write" description="Level 1 data cache refill - Write"/>
+    <event event="0x46" title="Cache" name="L1 data victim" description="Level 1 data cache Write-Back - Victim"/>
+    <event event="0x47" title="Cache" name="L1 data clean" description="Level 1 data cache Write-Back - Cleaning and coherency"/>
+    <event event="0x48" title="Cache" name="L1 data invalidate" description="Level 1 data cache invalidate"/>
+    <event event="0x4c" title="TLB" name="L1 data refill read" description="Level 1 data TLB refill - Read"/>
+    <event event="0x4d" title="TLB" name="L1 data refill write" description="Level 1 data TLB refill - Write"/>
+    <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
+    <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
+    <event event="0x52" title="Cache" name="L2 data refill read" description="Level 2 data cache refill - Read"/>
+    <event event="0x53" title="Cache" name="L2 data refill write" description="Level 2 data cache refill - Write"/>
+    <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-Back - Victim"/>
+    <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-Back - Cleaning and coherency"/>
+    <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
+    <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+    <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
+    <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
+    <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
+    <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
+    <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
+    <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
+    <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
+    <event event="0x6a" title="Memory" name="Unaligned" description="Unaligned access"/>
+    <event event="0x6c" title="Intrinsic" name="LDREX" description="Exclusive instruction speculatively executed - LDREX"/>
+    <event event="0x6d" title="Intrinsic" name="STREX pass" description="Exclusive instruction speculatively executed - STREX pass"/>
+    <event event="0x6e" title="Intrinsic" name="STREX fail" description="Exclusive instruction speculatively executed - STREX fail"/>
+    <event event="0x70" title="Instruction" name="Load" description="Instruction speculatively executed - Load"/>
+    <event event="0x71" title="Instruction" name="Store" description="Instruction speculatively executed - Store"/>
+    <event event="0x72" title="Instruction" name="Load/Store" description="Instruction speculatively executed - Load or store"/>
+    <event event="0x73" title="Instruction" name="Integer" description="Instruction speculatively executed - Integer data processing"/>
+    <event event="0x74" title="Instruction" name="Advanced SIMD" description="Instruction speculatively executed - Advanced SIMD"/>
+    <event event="0x75" title="Instruction" name="VFP" description="Instruction speculatively executed - VFP"/>
+    <event event="0x76" title="Instruction" name="Software change" description="Instruction speculatively executed - Software change of the PC"/>
+    <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
+    <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
+    <event event="0x7a" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
+    <event event="0x7c" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
+    <event event="0x7d" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
+    <event event="0x7e" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A17.xml b/tools/gator/daemon/events-Cortex-A17.xml
new file mode 100644
index 000000000000..4dd08c1f203d
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A17.xml
@@ -0,0 +1,86 @@
+  <counter_set name="ARMv7_Cortex_A17_cnt" count="6"/>
+  <category name="Cortex-A17" counter_set="ARMv7_Cortex_A17_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A17_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="L1 inst access" description="Instruction cache access"/>
+    <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
+    <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+    <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+    <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+    <event event="0x19" title="Bus" name="Access" description="Bus - Access"/>
+    <event event="0x1b" title="Instruction" name="Speculative" description="Instruction speculatively executed"/>
+    <event event="0x1c" title="Memory" name="Translation table" description="Write to translation table base architecturally executed"/>
+    <event event="0x1d" title="Bus" name="Cycle" description="Bus - Cycle"/>
+    <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
+    <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
+    <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
+    <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
+    <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-Back - Victim"/>
+    <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-Back - Cleaning and coherency"/>
+    <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
+    <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+    <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
+    <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
+    <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
+    <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
+    <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
+    <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
+    <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
+    <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
+    <event event="0x6a" title="Memory" name="Unaligned" description="Unaligned access"/>
+    <event event="0x6c" title="Intrinsic" name="LDREX" description="Exclusive instruction speculatively executed - LDREX"/>
+    <event event="0x6e" title="Intrinsic" name="STREX fail" description="Exclusive instruction speculatively executed - STREX fail"/>
+    <event event="0x6f" title="Intrinsic" name="STREX" description="Exclusive instruction speculatively executed - STREX"/>
+    <event event="0x70" title="Instruction" name="Load" description="Instruction speculatively executed - Load"/>
+    <event event="0x71" title="Instruction" name="Store" description="Instruction speculatively executed - Store"/>
+    <event event="0x72" title="Instruction" name="Load/Store" description="Instruction speculatively executed - Load or store"/>
+    <event event="0x73" title="Instruction" name="Integer" description="Instruction speculatively executed - Integer data processing"/>
+    <event event="0x74" title="Instruction" name="Advanced SIMD" description="Instruction speculatively executed - Advanced SIMD"/>
+    <event event="0x75" title="Instruction" name="VFP" description="Instruction speculatively executed - VFP"/>
+    <event event="0x76" title="Instruction" name="Software change" description="Instruction speculatively executed - Software change of the PC"/>
+    <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
+    <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
+    <event event="0x7a" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
+    <event event="0x7c" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
+    <event event="0x7d" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
+    <event event="0x7e" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
+    <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
+    <event event="0x8a" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
+    <event event="0xc0" title="Instruction" name="Stalled Linefill" description="Instruction side stalled due to a Linefill"/>
+    <event event="0xc1" title="Instruction" name="Stalled Page Table Walk" description="Instruction Side stalled due to a Page Table Walk"/>
+    <event event="0xc2" title="Cache" name="4 Ways Read" description="Number of set of 4 ways read in the instruction cache - Tag RAM"/>
+    <event event="0xc3" title="Cache" name="Ways Read" description="Number of ways read in the instruction cache - Data RAM"/>
+    <event event="0xc4" title="Cache" name="BATC Read" description="Number of ways read in the instruction BTAC RAM"/>
+    <event event="0xca" title="Memory" name="Snoop" description="Data snooped from other processor. This event counts memory-read operations that read data from another processor within the local Cortex-A17 cluster, rather than accessing the L2 cache or issuing an external read. It increments on each transaction, rather than on each beat of data"/>
+    <event event="0xd3" title="Slots" name="Load-Store Unit" description="Duration during which all slots in the Load-Store Unit are busy"/>
+    <event event="0xd8" title="Slots" name="Load-Store Issue Queue" description="Duration during which all slots in the Load-Store Issue queue are busy"/>
+    <event event="0xd9" title="Slots" name="Data Processing Issue Queue" description="Duration during which all slots in the Data Processing issue queue are busy"/>
+    <event event="0xda" title="Slots" name="Data Engine Issue Queue" description="Duration during which all slots in the Data Engine issue queue are busy"/>
+    <event event="0xdb" title="NEON" name="Flush" description="Number of NEON instruction which fail their condition code and lead to a flush of the DE pipe"/>
+    <event event="0xdc" title="Hypervisor" name="Traps" description="Number of Trap to hypervisor"/>
+    <event event="0xde" title="PTM" name="EXTOUT 0" description="PTM EXTOUT 0"/>
+    <event event="0xdf" title="PTM" name="EXTOUT 1" description="PTM EXTOUT 1"/>
+    <event event="0xe0" title="MMU" name="Table Walk" description="Duration during which the MMU handle a Page table walk"/>
+    <event event="0xe1" title="MMU" name="Stage1 Table Walk" description="Duration during which the MMU handle a Stage1 Page table walk"/>
+    <event event="0xe2" title="MMU" name="Stage2 Table Walk" description="Duration during which the MMU handle a Stage2 Page table walk"/>
+    <event event="0xe3" title="MMU" name="LSU Table Walk" description="Duration during which the MMU handle a Page table walk requested by the Load Store Unit"/>
+    <event event="0xe4" title="MMU" name="Instruction Table Walk" description="Duration during which the MMU handle a Page table walk requested by the Instruction side"/>
+    <event event="0xe5" title="MMU" name="Preload Table Walk" description="Duration during which the MMU handle a Page table walk requested by a Preload instruction or Prefetch request"/>
+    <event event="0xe6" title="MMU" name="cp15 Table Walk" description="Duration during which the MMU handle a Page table walk requested by a cp15 operation (maintenance by MVA and VA-to-PA operation)"/>
+    <event event="0xe7" title="Cache" name="L1 PLD TLB refill" description="Level 1 PLD TLB refill"/>
+    <event event="0xe8" title="Cache" name="L1 CP15 TLB refill" description="Level 1 CP15 TLB refill"/>
+    <event event="0xe9" title="Cache" name="L1 TLB flush" description="Level 1 TLB flush"/>
+    <event event="0xea" title="Cache" name="L2 TLB access" description="Level 2 TLB access"/>
+    <event event="0xeb" title="Cache" name="L2 TLB miss" description="Level 2 TLB miss"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A5.xml b/tools/gator/daemon/events-Cortex-A5.xml
new file mode 100644
index 000000000000..d67581d77c08
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A5.xml
@@ -0,0 +1,36 @@
+  <counter_set name="ARMv7_Cortex_A5_cnt" count="2"/>
+  <category name="Cortex-A5" counter_set="ARMv7_Cortex_A5_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A5_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Branch" name="PC change" description="Software change of the Program Counter, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0e" title="Procedure" name="Return" description="Procedure return, other than exception return, architecturally executed"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="Instruction access" description="Instruction cache access"/>
+    <event event="0x15" title="Cache" name="Data eviction" description="Data cache eviction"/>
+    <event event="0x86" title="Interrupts" name="IRQ" description="IRQ exception taken"/>
+    <event event="0x87" title="Interrupts" name="FIQ" description="FIQ exception taken"/>
+    <event event="0xC0" title="Memory" name="External request" description="External memory request"/>
+    <event event="0xC1" title="Memory" name="Non-cacheable ext req" description="Non-cacheable external memory request"/>
+    <event event="0xC2" title="Cache" name="Linefill" description="Linefill because of prefetch"/>
+    <event event="0xC3" title="Cache" name="Linefill dropped" description="Prefetch linefill dropped"/>
+    <event event="0xC4" title="Cache" name="Allocate mode enter" description="Entering read allocate mode"/>
+    <event event="0xC5" title="Cache" name="Allocate mode" description="Read allocate mode"/>
+    <event event="0xC7" title="ETM" name="ETM Ext Out[0]" description="ETM - ETM Ext Out[0]"/>
+    <event event="0xC8" title="ETM" name="ETM Ext Out[1]" description="ETM - ETM Ext Out[1]"/>
+    <event event="0xC9" title="Instruction" name="Pipeline stall" description="Data Write operation that stalls the pipeline because the store buffer is full"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A53.xml b/tools/gator/daemon/events-Cortex-A53.xml
new file mode 100644
index 000000000000..5ba17907d5ab
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A53.xml
@@ -0,0 +1,87 @@
+  <counter_set name="ARM_Cortex-A53_cnt" count="6"/>
+  <category name="Cortex-A53" counter_set="ARM_Cortex-A53_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARM_Cortex-A53_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="L1 inst access" description="Level 1 instruction cache access"/>
+    <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
+    <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+    <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+    <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+    <event event="0x19" title="Bus" name="Access" description="Bus access"/>
+    <event event="0x1A" title="Memory" name="Error" description="Local memory error"/>
+    <event event="0x1B" title="Instruction" name="Speculative" description="Operation speculatively executed"/>
+    <event event="0x1C" title="Memory" name="Translation table" description="Instruction architecturally executed (condition check pass) - Write to translation table base"/>
+    <event event="0x1D" title="Bus" name="Cycle" description="Bus cycle"/>
+    <event event="0x1E" title="Counter chain" name="Odd Performance" description="Odd performance counter chain mode"/>
+    <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
+    <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
+    <event event="0x42" title="Cache" name="L1 data refill read" description="Level 1 data cache refill - Read"/>
+    <event event="0x43" title="Cache" name="L1 data refill write" description="Level 1 data cache refill - Write"/>
+    <event event="0x46" title="Cache" name="L1 data victim" description="Level 1 data cache Write-back - Victim"/>
+    <event event="0x47" title="Cache" name="L1 data clean" description="Level 1 data cache Write-back - Cleaning and coherency"/>
+    <event event="0x48" title="Cache" name="L1 data invalidate" description="Level 1 data cache invalidate"/>
+    <event event="0x4C" title="Cache" name="L1 data refill read" description="Level 1 data TLB refill - Read"/>
+    <event event="0x4D" title="Cache" name="L1 data refill write" description="Level 1 data TLB refill - Write"/>
+    <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
+    <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
+    <event event="0x52" title="Cache" name="L2 data refill read" description="Level 2 data cache refill - Read"/>
+    <event event="0x53" title="Cache" name="L2 data refill write" description="Level 2 data cache refill - Write"/>
+    <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-back - Victim"/>
+    <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-back - Cleaning and coherency"/>
+    <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
+    <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+    <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
+    <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
+    <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
+    <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
+    <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
+    <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
+    <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
+    <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
+    <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
+    <event event="0x6A" title="Memory" name="Unaligned" description="Unaligned access"/>
+    <event event="0x6C" title="Intrinsic" name="LDREX" description="Exclusive operation speculatively executed - LDREX"/>
+    <event event="0x6D" title="Intrinsic" name="STREX pass" description="Exclusive instruction speculatively executed - STREX pass"/>
+    <event event="0x6E" title="Intrinsic" name="STREX fail" description="Exclusive operation speculatively executed - STREX fail"/>
+    <event event="0x70" title="Instruction" name="Load" description="Operation speculatively executed - Load"/>
+    <event event="0x71" title="Instruction" name="Store" description="Operation speculatively executed - Store"/>
+    <event event="0x72" title="Instruction" name="Load/Store" description="Operation speculatively executed - Load or store"/>
+    <event event="0x73" title="Instruction" name="Integer" description="Operation speculatively executed - Integer data processing"/>
+    <event event="0x74" title="Instruction" name="Advanced SIMD" description="Operation speculatively executed - Advanced SIMD"/>
+    <event event="0x75" title="Instruction" name="VFP" description="Operation speculatively executed - VFP"/>
+    <event event="0x76" title="Instruction" name="Software change" description="Operation speculatively executed - Software change of the PC"/>
+    <event event="0x77" title="Instruction" name="Crypto" description="Operation speculatively executed, crypto data processing"/>
+    <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
+    <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
+    <event event="0x7A" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
+    <event event="0x7C" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
+    <event event="0x7D" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
+    <event event="0x7E" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
+    <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
+    <event event="0x82" title="Exception" name="Supervisor" description="Exception taken, Supervisor Call"/>
+    <event event="0x83" title="Exception" name="Instruction abort" description="Exception taken, Instruction Abort"/>
+    <event event="0x84" title="Exception" name="Data abort" description="Exception taken, Data Abort or SError"/>
+    <event event="0x86" title="Interrupts" name="IRQ" description="Exception taken, IRQ"/>
+    <event event="0x87" title="Interrupts" name="FIQ" description="Exception taken, FIQ"/>
+    <event event="0x88" title="Exception" name="Secure monitor call" description="Exception taken, Secure Monitor Call"/>
+    <event event="0x8A" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
+    <event event="0x8B" title="Exception" name="Instruction abort non-local" description="Exception taken, Instruction Abort not taken locally"/>
+    <event event="0x8C" title="Exception" name="Data abort non-local" description="Exception taken, Data Abort or SError not taken locally"/>
+    <event event="0x8D" title="Exception" name="Other non-local" description="Exception taken - Other traps not taken locally"/>
+    <event event="0x8E" title="Exception" name="IRQ non-local" description="Exception taken, IRQ not taken locally"/>
+    <event event="0x8F" title="Exception" name="FIQ non-local" description="Exception taken, FIQ not taken locally"/>
+    <event event="0x90" title="Release Consistency" name="Load" description="Release consistency instruction speculatively executed - Load Acquire"/>
+    <event event="0x91" title="Release Consistency" name="Store" description="Release consistency instruction speculatively executed - Store Release"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A57.xml b/tools/gator/daemon/events-Cortex-A57.xml
new file mode 100644
index 000000000000..fbe96c2d4eb2
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A57.xml
@@ -0,0 +1,87 @@
+  <counter_set name="ARM_Cortex-A57_cnt" count="6"/>
+  <category name="Cortex-A57" counter_set="ARM_Cortex-A57_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARM_Cortex-A57_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="L1 inst access" description="Level 1 instruction cache access"/>
+    <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
+    <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+    <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+    <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+    <event event="0x19" title="Bus" name="Access" description="Bus access"/>
+    <event event="0x1A" title="Memory" name="Error" description="Local memory error"/>
+    <event event="0x1B" title="Instruction" name="Speculative" description="Operation speculatively executed"/>
+    <event event="0x1C" title="Memory" name="Translation table" description="Instruction architecturally executed (condition check pass) - Write to translation table base"/>
+    <event event="0x1D" title="Bus" name="Cycle" description="Bus cycle"/>
+    <event event="0x1E" title="Counter chain" name="Odd Performance" description="Odd performance counter chain mode"/>
+    <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
+    <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
+    <event event="0x42" title="Cache" name="L1 data refill read" description="Level 1 data cache refill - Read"/>
+    <event event="0x43" title="Cache" name="L1 data refill write" description="Level 1 data cache refill - Write"/>
+    <event event="0x46" title="Cache" name="L1 data victim" description="Level 1 data cache Write-back - Victim"/>
+    <event event="0x47" title="Cache" name="L1 data clean" description="Level 1 data cache Write-back - Cleaning and coherency"/>
+    <event event="0x48" title="Cache" name="L1 data invalidate" description="Level 1 data cache invalidate"/>
+    <event event="0x4C" title="Cache" name="L1 data refill read" description="Level 1 data TLB refill - Read"/>
+    <event event="0x4D" title="Cache" name="L1 data refill write" description="Level 1 data TLB refill - Write"/>
+    <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
+    <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
+    <event event="0x52" title="Cache" name="L2 data refill read" description="Level 2 data cache refill - Read"/>
+    <event event="0x53" title="Cache" name="L2 data refill write" description="Level 2 data cache refill - Write"/>
+    <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-back - Victim"/>
+    <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-back - Cleaning and coherency"/>
+    <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
+    <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+    <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
+    <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
+    <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
+    <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
+    <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
+    <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
+    <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
+    <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
+    <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
+    <event event="0x6A" title="Memory" name="Unaligned" description="Unaligned access"/>
+    <event event="0x6C" title="Intrinsic" name="LDREX" description="Exclusive operation speculatively executed - LDREX"/>
+    <event event="0x6D" title="Intrinsic" name="STREX pass" description="Exclusive instruction speculatively executed - STREX pass"/>
+    <event event="0x6E" title="Intrinsic" name="STREX fail" description="Exclusive operation speculatively executed - STREX fail"/>
+    <event event="0x70" title="Instruction" name="Load" description="Operation speculatively executed - Load"/>
+    <event event="0x71" title="Instruction" name="Store" description="Operation speculatively executed - Store"/>
+    <event event="0x72" title="Instruction" name="Load/Store" description="Operation speculatively executed - Load or store"/>
+    <event event="0x73" title="Instruction" name="Integer" description="Operation speculatively executed - Integer data processing"/>
+    <event event="0x74" title="Instruction" name="Advanced SIMD" description="Operation speculatively executed - Advanced SIMD"/>
+    <event event="0x75" title="Instruction" name="VFP" description="Operation speculatively executed - VFP"/>
+    <event event="0x76" title="Instruction" name="Software change" description="Operation speculatively executed - Software change of the PC"/>
+    <event event="0x77" title="Instruction" name="Crypto" description="Operation speculatively executed, crypto data processing"/>
+    <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
+    <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
+    <event event="0x7A" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
+    <event event="0x7C" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
+    <event event="0x7D" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
+    <event event="0x7E" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
+    <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
+    <event event="0x82" title="Exception" name="Supervisor" description="Exception taken, Supervisor Call"/>
+    <event event="0x83" title="Exception" name="Instruction abort" description="Exception taken, Instruction Abort"/>
+    <event event="0x84" title="Exception" name="Data abort" description="Exception taken, Data Abort or SError"/>
+    <event event="0x86" title="Interrupts" name="IRQ" description="Exception taken, IRQ"/>
+    <event event="0x87" title="Interrupts" name="FIQ" description="Exception taken, FIQ"/>
+    <event event="0x88" title="Exception" name="Secure monitor call" description="Exception taken, Secure Monitor Call"/>
+    <event event="0x8A" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
+    <event event="0x8B" title="Exception" name="Instruction abort non-local" description="Exception taken, Instruction Abort not taken locally"/>
+    <event event="0x8C" title="Exception" name="Data abort non-local" description="Exception taken, Data Abort or SError not taken locally"/>
+    <event event="0x8D" title="Exception" name="Other non-local" description="Exception taken - Other traps not taken locally"/>
+    <event event="0x8E" title="Exception" name="IRQ non-local" description="Exception taken, IRQ not taken locally"/>
+    <event event="0x8F" title="Exception" name="FIQ non-local" description="Exception taken, FIQ not taken locally"/>
+    <event event="0x90" title="Release Consistency" name="Load" description="Release consistency instruction speculatively executed - Load Acquire"/>
+    <event event="0x91" title="Release Consistency" name="Store" description="Release consistency instruction speculatively executed - Store Release"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A7.xml b/tools/gator/daemon/events-Cortex-A7.xml
new file mode 100644
index 000000000000..6e078b3cffa3
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A7.xml
@@ -0,0 +1,43 @@
+  <counter_set name="ARMv7_Cortex_A7_cnt" count="4"/>
+  <category name="Cortex-A7" counter_set="ARMv7_Cortex_A7_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A7_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Software increment architecturally executed"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Memory" name="Data Read" description="Data read architecturally executed"/>
+    <event event="0x07" title="Memory" name="Data Write" description="Data write architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Branch" name="PC change" description="Software change of the Program Counter, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+    <event event="0x14" title="Cache" name="L1 inst access" description="Instruction cache access"/>
+    <event event="0x15" title="Cache" name="L1 data eviction" description="Level 1 data cache eviction"/>
+    <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+    <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+    <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+    <event event="0x19" title="Bus" name="Access" description="Bus - Access"/>
+    <event event="0x1d" title="Bus" name="Cycle" description="Bus - Cycle"/>
+    <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+    <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
+    <event event="0x86" title="Exception" name="IRQ" description="IRQ exception taken"/>
+    <event event="0x87" title="Exception" name="FIQ" description="FIQ exception taken"/>
+    <event event="0xC0" title="Memory" name="External request" description="External memory request"/>
+    <event event="0xC1" title="Memory" name="Non-cacheable ext req" description="Non-cacheable external memory request"/>
+    <event event="0xC2" title="Cache" name="Linefill" description="Linefill because of prefetch"/>
+    <event event="0xC3" title="Cache" name="Linefill dropped" description="Prefetch linefill dropped"/>
+    <event event="0xC4" title="Cache" name="Allocate mode enter" description="Entering read allocate mode"/>
+    <event event="0xC5" title="Cache" name="Allocate mode" description="Read allocate mode"/>
+    <event event="0xC7" title="ETM" name="ETM Ext Out[0]" description="ETM - ETM Ext Out[0]"/>
+    <event event="0xC8" title="ETM" name="ETM Ext Out[1]" description="ETM - ETM Ext Out[1]"/>
+    <event event="0xC9" title="Instruction" name="Pipeline stall" description="Data Write operation that stalls the pipeline because the store buffer is full"/>
+    <event event="0xCA" title="Memory" name="Snoop" description="Data snooped from other processor. This event counts memory-read operations that read data from another processor within the local cluster, rather than accessing the L2 cache or issuing an external read."/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A8.xml b/tools/gator/daemon/events-Cortex-A8.xml
new file mode 100644
index 000000000000..a69e25ab2c34
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A8.xml
@@ -0,0 +1,52 @@
+  <counter_set name="ARMv7_Cortex_A8_cnt" count="4"/>
+  <category name="Cortex-A8" counter_set="ARMv7_Cortex_A8_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A8_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Branch" name="PC change" description="Software change of the Program Counter, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0e" title="Procedure" name="Return" description="Procedure return, other than exception return, architecturally executed"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x40" title="Cache" name="Write buffer full" description="Any write buffer full cycle"/>
+    <event event="0x41" title="Cache" name="L2 store" description="Any store that is merged in the L2 memory system"/>
+    <event event="0x42" title="Cache" name="Bufferable transaction" description="Any bufferable store transaction from load/store to L2 cache, excluding eviction or cast out data"/>
+    <event event="0x43" title="Cache" name="L2 access" description="Any accesses to the L2 cache"/>
+    <event event="0x44" title="Cache" name="L2 miss" description="Any cacheable miss in the L2 cache"/>
+    <event event="0x45" title="AXI" name="Read" description="The number of AXI read data transfers"/>
+    <event event="0x46" title="AXI" name="Write" description="The number of AXI write data transfers"/>
+    <event event="0x47" title="Memory" name="Replay event" description="Any replay event in the memory system"/>
+    <event event="0x48" title="Memory" name="Unaligned access replay" description="Any unaligned memory access that results in a replay"/>
+    <event event="0x49" title="Cache" name="L1 data hash miss" description="Any L1 data memory access that misses in the cache as a result of the hashing algorithm"/>
+    <event event="0x4a" title="Cache" name="L1 inst hash miss" description="Any L1 instruction memory access that misses in the cache as a result of the hashing algorithm"/>
+    <event event="0x4b" title="Cache" name="L1 page coloring" description="Any L1 data memory access in which a page coloring alias occurs"/>
+    <event event="0x4c" title="NEON" name="L1 cache hit" description="Any NEON access that hits in the L1 data cache"/>
+    <event event="0x4d" title="NEON" name="L1 cache access" description="Any NEON cacheable data accesses for L1 data cache"/>
+    <event event="0x4e" title="NEON" name="L2 cache access" description="Any L2 cache accesses as a result of a NEON memory access"/>
+    <event event="0x4f" title="NEON" name="L2 cache hit" description="Any NEON hit in the L2 cache"/>
+    <event event="0x50" title="Cache" name="L1 inst access" description="Any L1 instruction cache access, excluding CP15 cache accesses"/>
+    <event event="0x51" title="Branch" name="Return stack misprediction" description="Any return stack misprediction because of incorrect target address for a taken return stack pop"/>
+    <event event="0x52" title="Branch" name="Direction misprediction" description="Branch direction misprediction"/>
+    <event event="0x53" title="Branch" name="Taken prediction" description="Any predictable branch that is predicted to be taken"/>
+    <event event="0x54" title="Branch" name="Executed and taken prediction" description="Any predictable branch that is executed and taken"/>
+    <event event="0x55" title="Core" name="Operations issued" description="Number of operations issued, where an operation is either: an instruction or one operation in a sequence of operations that make up a multi-cycle instruction"/>
+    <event event="0x56" title="Core" name="No issue cycles" description="Increment for every cycle that no instructions are available for issue"/>
+    <event event="0x57" title="Core" name="Issue cycles" description="For every cycle, this event counts the number of instructions issued in that cycle. Multi-cycle instructions are only counted once"/>
+    <event event="0x58" title="NEON" name="MRC data wait" description="Number of cycles the processor stalls waiting on MRC data from NEON"/>
+    <event event="0x59" title="NEON" name="Full queue" description="Number of cycles that the processor stalls as a result of a full NEON instruction queue or NEON load queue"/>
+    <event event="0x5a" title="NEON" name="Idle" description="Number of cycles that NEON and integer processors are both not idle"/>
+    <event event="0x70" title="External" name="PMUEXTIN[0]" description="Counts any event from external input source PMUEXTIN[0]"/>
+    <event event="0x71" title="External" name="PMUEXTIN[1]" description="Counts any event from external input source PMUEXTIN[1]"/>
+    <event event="0x72" title="External" name="PMUEXTIN[0,1]" description="Counts any event from both external input sources PMUEXTIN[0] and PMUEXTIN[1]"/>
+  </category>
diff --git a/tools/gator/daemon/events-Cortex-A9.xml b/tools/gator/daemon/events-Cortex-A9.xml
new file mode 100644
index 000000000000..3e7f8289062e
--- /dev/null
+++ b/tools/gator/daemon/events-Cortex-A9.xml
@@ -0,0 +1,65 @@
+  <counter_set name="ARMv7_Cortex_A9_cnt" count="6"/>
+  <category name="Cortex-A9" counter_set="ARMv7_Cortex_A9_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A9_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Branch" name="PC change" description="Software change of the Program Counter, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x40" title="Java" name="Bytecode execute" description="Counts the number of Java bytecodes being decoded, including speculative ones"/>
+    <event event="0x41" title="Java" name="SW bytecode execute" description="Counts the number of software java bytecodes being decoded, including speculative ones"/>
+    <event event="0x42" title="Jazelle" name="Backward branch execute" description="Counts the number of Jazelle taken branches being executed"/>
+    <event event="0x50" title="Cache" name="Coherency miss" description="Counts the number of coherent linefill requests performed by the Cortex-A9 processor which also miss in all the other Cortex-A9 processors, meaning that the request is sent to the external memory"/>
+    <event event="0x51" title="Cache" name="Coherency hit" description="Counts the number of coherent linefill requests performed by the Cortex-A9 processor which hit in another Cortex-A9 processor, meaning that the linefill data is fetched directly from the relevant Cortex-A9 cache"/>
+    <event event="0x60" title="Cache" name="Inst dependent stall" description="Counts the number of cycles where the processor is ready to accept new instructions, but does not receive any because of the instruction side not being able to provide any and the instruction cache is currently performing at least one linefill"/>
+    <event event="0x61" title="Cache" name="Data dependent stall" description="Counts the number of cycles where the core has some instructions that it cannot issue to any pipeline, and the Load Store unit has at least one pending linefill request, and no pending TLB requests"/>
+    <event event="0x62" title="Cache" name="TLB stall" description="Counts the number of cycles where the processor is stalled waiting for the completion of translation table walks from the main TLB"/>
+    <event event="0x63" title="Intrinsic" name="STREX pass" description="Counts the number of STREX instructions architecturally executed and passed"/>
+    <event event="0x64" title="Intrinsic" name="STREX fail" description="Counts the number of STREX instructions architecturally executed and failed"/>
+    <event event="0x65" title="Cache" name="Data eviction" description="Counts the number of eviction requests because of a linefill in the data cache"/>
+    <event event="0x66" title="Pipeline" name="Issue stage no dispatch" description="Counts the number of cycles where the issue stage does not dispatch any instruction because it is empty or cannot dispatch any instructions"/>
+    <event event="0x67" title="Pipeline" name="Issue stage empty" description="Counts the number of cycles where the issue stage is empty"/>
+    <event event="0x68" title="Instruction" name="Executed" description="Counts the number of instructions going through the Register Renaming stage. This number is an approximate number of the total number of instructions speculatively executed, and even more approximate of the total number of instructions architecturally executed"/>
+    <event event="0x69" title="Cache" name="Data linefills" description="Counts the number of linefills performed on the external AXI bus"/>
+    <event event="0x6A" title="Cache" name="Prefetch linefills" description="Counts the number of data linefills caused by prefetcher requests"/>
+    <event event="0x6B" title="Cache" name="Prefetch hits" description="Counts the number of cache hits in a line that belongs to a stream followed by the prefetcher"/>
+    <event event="0x6E" title="Core" name="Functions" description="Counts the number of procedure returns whose condition codes do not fail, excluding all returns from exception"/>
+    <event event="0x70" title="Instruction" name="Main execution unit" description="Counts the number of instructions being executed in the main execution pipeline of the processor, the multiply pipeline and arithmetic logic unit pipeline"/>
+    <event event="0x71" title="Instruction" name="Second execution unit" description="Counts the number of instructions being executed in the processor second execution pipeline (ALU)"/>
+    <event event="0x72" title="Instruction" name="Load/Store" description="Counts the number of instructions being executed in the Load/Store unit"/>
+    <event event="0x73" title="Instruction" name="Floating point" description="Counts the number of Floating-point instructions going through the Register Rename stage"/>
+    <event event="0x74" title="Instruction" name="NEON" description="Counts the number of NEON instructions going through the Register Rename stage"/>
+    <event event="0x80" title="Stalls" name="PLD" description="Counts the number of cycles where the processor is stalled because PLD slots are all full"/>
+    <event event="0x81" title="Stalls" name="Memory write" description="Counts the number of cycles when the processor is stalled and the data side is stalled too because it is full and executing writes to the external memory"/>
+    <event event="0x82" title="Stalls" name="Inst main TLB miss" description="Counts the number of stall cycles because of main TLB misses on requests issued by the instruction side"/>
+    <event event="0x83" title="Stalls" name="Data main TLB miss" description="Counts the number of stall cycles because of main TLB misses on requests issued by the data side"/>
+    <event event="0x84" title="Stalls" name="Inst micro TLB miss" description="Counts the number of stall cycles because of micro TLB misses on the instruction side"/>
+    <event event="0x85" title="Stalls" name="Data micro TLB miss" description="Counts the number of stall cycles because of micro TLB misses on the data side"/>
+    <event event="0x86" title="Stalls" name="DMB" description="Counts the number of stall cycles because of the execution of a DMB memory barrier"/>
+    <event event="0x8A" title="Clock" name="Integer core" description="Counts the number of cycles during which the integer core clock is enabled"/>
+    <event event="0x8B" title="Clock" name="Data engine" description="Counts the number of cycles during which the Data Engine clock is enabled"/>
+    <event event="0x8C" title="Clock" name="NEON" description="Counts the number of cycles when the NEON SIMD clock is enabled"/>
+    <event event="0x8D" title="Memory" name="TLB inst allocations" description="Counts the number of TLB allocations because of Instruction requests"/>
+    <event event="0x8E" title="Memory" name="TLB data allocations" description="Counts the number of TLB allocations because of Data requests"/>
+    <event event="0x90" title="Instruction" name="ISB" description="Counts the number of ISB instructions architecturally executed"/>
+    <event event="0x91" title="Instruction" name="DSB" description="Counts the number of DSB instructions architecturally executed"/>
+    <event event="0x92" title="Instruction" name="DMB" description="Counts the number of DMB instructions speculatively executed"/>
+    <event event="0x93" title="External" name="Interrupts" description="Counts the number of external interrupts executed by the processor"/>
+    <event event="0xA0" title="PLE" name="Cache line rq completed" description="Counts the number of PLE cache line requests completed"/>
+    <event event="0xA1" title="PLE" name="Cache line rq skipped" description="Counts the number of PLE cache line requests skipped"/>
+    <event event="0xA2" title="PLE" name="FIFO flush" description="Counts the number of PLE FIFO flush requests"/>
+    <event event="0xA3" title="PLE" name="Request completed" description="Counts the number of PLE FIFO flush completed"/>
+    <event event="0xA4" title="PLE" name="FIFO overflow" description="Counts the number of PLE FIFO flush overflowed"/>
+    <event event="0xA5" title="PLE" name="Request programmed" description="Counts the number of PLE FIFO flush program requests"/>
+  </category>
diff --git a/tools/gator/daemon/events-Filesystem.xml b/tools/gator/daemon/events-Filesystem.xml
new file mode 100644
index 000000000000..9ef61ddac811
--- /dev/null
+++ b/tools/gator/daemon/events-Filesystem.xml
@@ -0,0 +1,11 @@
+  <category name="Filesystem">
+    <!-- counter attribute must start with filesystem_ and be unique -->
+    <!-- regex item in () is the value shown -->
+    <!--
+    <event counter="filesystem_cpu1_online" path="/sys/devices/system/cpu/cpu1/online" title="online" name="cpu 1" class="absolute" description="If cpu 1 is online"/>
+    <event counter="filesystem_loginuid" path="/proc/self/loginuid" title="loginuid" name="loginuid" class="absolute" description="loginuid"/>
+    <event counter="filesystem_gatord_rss" path="/proc/self/stat" title="stat" name="rss" class="absolute" regex="-?[0-9]+ \(.*\) . -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ (-?[0-9]+)" units="pages" description="resident set size"/>
+    <event counter="filesystem_processes" path="/proc/stat" title="proc-stat" name="processes" class="absolute" regex="processes ([0-9]+)" description="Number of processes and threads created"/>
+    <event counter="filesystem_context_switches" path="/proc/stat" title="proc-stat" name="context switches" class="absolute" regex="ctxt ([0-9]+)" description="Number of processes and threads created"/>
+    -->
+  </category>
diff --git a/tools/gator/daemon/events-Krait-architected.xml b/tools/gator/daemon/events-Krait-architected.xml
new file mode 100644
index 000000000000..b8d3bcb48de7
--- /dev/null
+++ b/tools/gator/daemon/events-Krait-architected.xml
@@ -0,0 +1,22 @@
+  <counter_set name="Krait_cnt" count="4"/>
+  <category name="Krait" counter_set="Krait_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="Krait_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Program Counter" name="SW change" description="Software change of PC, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0e" title="Branch" name="Procedure Return" description="Procedure return architecturally executed (not by exceptions)"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+  </category>
diff --git a/tools/gator/daemon/events-L2C-310.xml b/tools/gator/daemon/events-L2C-310.xml
new file mode 100644
index 000000000000..923fb90334d0
--- /dev/null
+++ b/tools/gator/daemon/events-L2C-310.xml
@@ -0,0 +1,18 @@
+  <counter_set name="L2C-310_cnt" count="2"/>
+  <category name="L2C-310" counter_set="L2C-310_cnt" per_cpu="no">
+    <event event="0x1" title="L2 Cache" name="CastOUT" description="Eviction, CastOUT, of a line from the L2 cache"/>
+    <event event="0x2" title="L2 Cache" name="Data Read Hit" description="Data read hit in the L2 cache"/>
+    <event event="0x3" title="L2 Cache" name="Data Read Request" description="Data read lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x4" title="L2 Cache" name="Data Write Hit" description="Data write hit in the L2 cache"/>
+    <event event="0x5" title="L2 Cache" name="Data Write Request" description="Data write lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x6" title="L2 Cache" name="Data Write-Through Request" description="Data write lookup to the L2 cache with Write-Through attribute. Subsequently results in a hit or miss"/>
+    <event event="0x7" title="L2 Cache" name="Instruction Read Hit" description="Instruction read hit in the L2 cache"/>
+    <event event="0x8" title="L2 Cache" name="Instruction Read Request" description="Instruction read lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x9" title="L2 Cache" name="Write Allocate Miss" description="Allocation into the L2 cache caused by a write, with Write-Allocate attribute, miss"/>
+    <event event="0xa" title="L2 Cache" name="Internal Prefetch Allocate" description="Allocation of a prefetch generated by L2C-310 into the L2 cache"/>
+    <event event="0xb" title="L2 Cache" name="Prefitch Hit" description="Prefetch hint hits in the L2 cache"/>
+    <event event="0xc" title="L2 Cache" name="Prefitch Allocate" description="Prefetch hint allocated into the L2 cache"/>
+    <event event="0xd" title="L2 Cache" name="Speculative Read Received" description="Speculative read received"/>
+    <event event="0xe" title="L2 Cache" name="Speculative Read Confirmed" description="Speculative read confirmed"/>
+    <event event="0xf" title="L2 Cache" name="Prefetch Hint Received" description="Prefetch hint received"/>
+  </category>
diff --git a/tools/gator/daemon/events-Linux.xml b/tools/gator/daemon/events-Linux.xml
new file mode 100644
index 000000000000..62a7018d038f
--- /dev/null
+++ b/tools/gator/daemon/events-Linux.xml
@@ -0,0 +1,17 @@
+  <category name="Linux">
+    <event counter="Linux_irq_softirq" title="Interrupts" name="SoftIRQ" per_cpu="yes" description="Linux SoftIRQ taken"/>
+    <event counter="Linux_irq_irq" title="Interrupts" name="IRQ" per_cpu="yes" description="Linux IRQ taken"/>
+    <event counter="Linux_block_rq_wr" title="Disk I/O" name="Write" units="B" description="Disk I/O Bytes Written"/>
+    <event counter="Linux_block_rq_rd" title="Disk I/O" name="Read" units="B" description="Disk I/O Bytes Read"/>
+    <event counter="Linux_net_rx" title="Network" name="Receive" units="B" description="Receive network traffic, including effect from Streamline"/>
+    <event counter="Linux_net_tx" title="Network" name="Transmit" units="B" description="Transmit network traffic, including effect from Streamline"/>
+    <event counter="Linux_sched_switch" title="Scheduler" name="Switch" per_cpu="yes" description="Context switch events"/>
+    <event counter="Linux_meminfo_memused" title="Memory" name="Used" class="absolute" units="B" proc="yes" description="Total used memory size. Note: a process' used memory includes shared memory that may be counted more than once (equivalent to RES from top). Kernel threads are not filterable."/>
+    <event counter="Linux_meminfo_memused2" title="Memory" name="Used" class="absolute" units="B" description="Total used memory size"/>
+    <event counter="Linux_meminfo_memfree" title="Memory" name="Free" class="absolute" display="minimum" units="B" description="Available memory size"/>
+    <event counter="Linux_meminfo_bufferram" title="Memory" name="Buffer" class="absolute" units="B" description="Memory used by OS disk buffers"/>
+    <event counter="Linux_power_cpu_freq" title="Clock" name="Frequency" per_cpu="yes" class="absolute" units="Hz" series_composition="overlay" average_cores="yes" description="Frequency setting of the CPU"/>
+    <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x003c96fb" description="One or more threads are runnable but waiting due to CPU contention"/>
+    <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x00b30000" description="One or more threads are blocked on an I/O resource"/>
+    <event counter="Linux_power_cpu" title="CPU Status" name="Activity" class="activity" activity1="Off" activity_color1="0x0000ff00" activity2="WFI" activity_color2="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="CPU Status"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-4xx.xml b/tools/gator/daemon/events-Mali-4xx.xml
new file mode 100644
index 000000000000..0a95dfeb6485
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-4xx.xml
@@ -0,0 +1,245 @@
+  <counter_set name="ARM_Mali-4xx_VP_0_cnt" count="2"/>
+  <counter_set name="ARM_Mali-4xx_SW_cnt" count="0"/>
+  <category name="Mali Vertex Processor" counter_set="ARM_Mali-4xx_VP_0_cnt" per_cpu="no">
+    <event event="0x01" title="Mali-4xx VP" name="Active cycles" description="Number of cycles per frame the MaliGP2 was active."/>
+    <event event="0x02" title="Mali-4xx VP" name="Active cycles, vertex shader" description="Number of cycles per frame the vertex shader unit was active."/>
+    <event event="0x03" title="Mali-4xx VP" name="Active cycles, vertex storer" description="Number of cycles per frame the vertex storer unit was active."/>
+    <event event="0x04" title="Mali-4xx VP" name="Active cycles, vertex loader" description="Number of cycles per frame the vertex loader unit was active."/>
+    <event event="0x05" title="Mali-4xx VP" name="Cycles vertex loader waiting for vertex shader" description="Number of cycles per frame the vertex loader was idle while waiting on the vertex shader."/>
+    <event event="0x06" title="Mali-4xx VP" name="Words read, system bus" description="Total number of 64 bit words read by the GP2 from the system bus per frame."/>
+    <event event="0x07" title="Mali-4xx VP" name="Words written, system bus" description="Total number of 64 bit words written by the GP2 to the system bus per frame."/>
+    <event event="0x08" title="Mali-4xx VP" name="Read bursts, system bus" description="Number of read bursts by the GP2 from the system bus per frame."/>
+    <event event="0x09" title="Mali-4xx VP" name="Write bursts, system bus" description="Number of write bursts from the MaliGP2 to the system bus per frame."/>
+    <event event="0x0a" title="Mali-4xx VP" name="Vertices processed" description="Number of vertices processed by the MaliGP2 per frame."/>
+    <event event="0x0b" title="Mali-4xx VP" name="Vertices fetched" description="Number of vertices fetched by the MaliGP2 per frame."/>
+    <event event="0x0c" title="Mali-4xx VP" name="Primitives fetched" description="Number of graphics primitives fetched by the MaliGP2 per frame."/>
+    <event event="0x0e" title="Mali-4xx VP" name="Primitives culled" description="Number of graphics primitives discarded per frame, because they were seen from the back or were offscreen."/>
+    <event event="0x0f" title="Mali-4xx VP" name="Commands written to tiles" description="Number of commands (8 Bytes, mainly primitives) written by GP2 to the PP input data structure per frame."/>
+    <event event="0x10" title="Mali-4xx VP" name="Memory blocks allocated" description="Number of overflow data blocks needed for outputting the PP input data structure per frame ."/>
+    <event event="0x13" title="Mali-4xx VP" name="Vertex loader cache misses" description="Number of cache misses for the vertex shader's vertex input unit per frame."/>
+    <event event="0x16" title="Mali-4xx VP" name="Active cycles, vertex shader command processor" description="Number of cycles per frame the GP2 vertex shader command processor was active. This includes time waiting for semaphores."/>
+    <event event="0x17" title="Mali-4xx VP" name="Active cycles, PLBU command processor" description="Number of cycles per frame the MaliGP2 PLBU command processor was active. This includes time waiting for semaphores."/>
+    <event event="0x18" title="Mali-4xx VP" name="Active Cycles, PLBU list writer" description="Number of cycles per frame the MaliGP2 PLBU output unit was active. This includes time spent waiting on the bus."/>
+    <event event="0x19" title="Mali-4xx VP" name="Active cycles, PLBU geometry processing" description="Number of cycles per frame the MaliGP2 PLBU was active, excepting final data output. In other words: active cycles through the prepare list commands. This includes time spent waiting on the bus."/>
+    <event event="0x1b" title="Mali-4xx VP" name="Active cycles, PLBU primitive assembly" description="Number of active cycles per frame spent by the MaliGP2 PLBU doing primitive assembly. This does not include scissoring or final output. This includes time spent waiting on the bus."/>
+    <event event="0x1c" title="Mali-4xx VP" name="Active cycles, PLBU vertex fetcher" description="Number of active cycles per frame spent by the MaliGP2 PLBU fetching vertex data. This includes time spent waiting on the bus."/>
+    <event event="0x1e" title="Mali-4xx VP" name="Active cycles, Bounding-box and command generator" description="Number of active cycles per frame spent by the MaliGP2 PLBU setting up bounding boxes and commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
+    <event event="0x20" title="Mali-4xx VP" name="Active cycles, Scissor tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over tiles to perform scissoring. This includes time spent waiting on the bus."/>
+    <event event="0x21" title="Mali-4xx VP" name="Active cycles, PLBU tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over the tiles in the bounding box generating commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
+  </category>
+  <category name="Mali Fragment Processor" per_cpu="no">
+    <counter_set name="ARM_Mali-4xx_FP_0_cnt" title="Mali-4xx FP0" description="Mali GPU Fragment Processor 0" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_1_cnt" title="Mali-4xx FP1" description="Mali GPU Fragment Processor 1" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_2_cnt" title="Mali-4xx FP2" description="Mali GPU Fragment Processor 2" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_3_cnt" title="Mali-4xx FP3" description="Mali GPU Fragment Processor 3" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_4_cnt" title="Mali-4xx FP4" description="Mali GPU Fragment Processor 4" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_5_cnt" title="Mali-4xx FP5" description="Mali GPU Fragment Processor 5" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_6_cnt" title="Mali-4xx FP6" description="Mali GPU Fragment Processor 6" count="2"/>
+    <counter_set name="ARM_Mali-4xx_FP_7_cnt" title="Mali-4xx FP7" description="Mali GPU Fragment Processor 7" count="2"/>
+    <event event="0x00" title="Mali-4xx FP" name="Active clock cycles" description="Active clock cycles, between polygon start and IRQ."/>
+    <event event="0x02" title="Mali-4xx FP" name="Total bus reads" description="Total number of 64-bit words read from the bus."/>
+    <event event="0x03" title="Mali-4xx FP" name="Total bus writes" description="Total number of 64-bit words written to the bus."/>
+    <event event="0x04" title="Mali-4xx FP" name="Bus read request cycles" description="Number of cycles during which the bus read request signal was HIGH."/>
+    <event event="0x05" title="Mali-4xx FP" name="Bus write request cycles" description="Number of cycles during which the bus write request signal was HIGH."/>
+    <event event="0x06" title="Mali-4xx FP" name="Bus read transactions count" description="Number of read requests accepted by the bus."/>
+    <event event="0x07" title="Mali-4xx FP" name="Bus write transactions" description="Number of write requests accepted by the bus."/>
+    <event event="0x09" title="Mali-4xx FP" name="Tile writeback writes" description="64-bit words written to the bus by the writeback unit."/>
+    <event event="0x0a" title="Mali-4xx FP" name="Store unit writes" description="64-bit words written to the bus by the store unit."/>
+    <event event="0x0d" title="Mali-4xx FP" name="Texture cache uncompressed reads" description="Number of 64-bit words read from the bus into the uncompressed textures cache."/>
+    <event event="0x0e" title="Mali-4xx FP" name="Polygon list reads" description="Number of 64-bit words read from the bus by the polygon list reader."/>
+    <event event="0x0f" title="Mali-4xx FP" name="RSW reads" description="Number of 64-bit words read from the bus into the Render State Word register."/>
+    <event event="0x10" title="Mali-4xx FP" name="Vertex cache reads" description="Number of 64-bit words read from the bus into the vertex cache."/>
+    <event event="0x11" title="Mali-4xx FP" name="Uniform remapping reads" description="Number of 64-bit words read from the bus when reading from the uniform remapping table."/>
+    <event event="0x12" title="Mali-4xx FP" name="Program cache reads" description="Number of 64-bit words read from the bus into the fragment shader program cache."/>
+    <event event="0x13" title="Mali-4xx FP" name="Varying reads" description="Number of 64-bit words containing varyings generated by the vertex processing read from the bus."/>
+    <event event="0x14" title="Mali-4xx FP" name="Texture descriptors reads" description="Number of 64-bit words containing texture descriptors read from the bus."/>
+    <event event="0x15" title="Mali-4xx FP" name="Texture descriptor remapping reads" description="Number of 64-bit words read from the bus when reading from the texture descriptor remapping table."/>
+    <event event="0x17" title="Mali-4xx FP" name="Load unit reads" description="Number of 64-bit words read from the bus by the LOAD sub-instruction."/>
+    <event event="0x18" title="Mali-4xx FP" name="Polygon count" description="Number of triangles read from the polygon list."/>
+    <event event="0x19" title="Mali-4xx FP" name="Pixel rectangle count" description="Number of pixel rectangles read from the polygon list."/>
+    <event event="0x1a" title="Mali-4xx FP" name="Lines count" description="Number of lines read from the polygon list."/>
+    <event event="0x1b" title="Mali-4xx FP" name="Points count" description="Number of points read from the polygon list."/>
+    <event event="0x1c" title="Mali-4xx FP" name="Stall cycles PolygonListReader" description="Number of clock cycles the Polygon List Reader waited for output being collected."/>
+    <event event="0x1d" title="Mali-4xx FP" name="Stall cycles triangle setup" description="Number of clock cycles the TSC waited for input."/>
+    <event event="0x1e" title="Mali-4xx FP" name="Quad rasterized count" description="Number of 2x?2 quads output from rasterizer."/>
+    <event event="0x1f" title="Mali-4xx FP" name="Fragment rasterized count" description="Number of fragment rasterized. Fragments/(Quads*4) gives average actual fragments per quad."/>
+    <event event="0x20" title="Mali-4xx FP" name="Fragment rejected fragment-kill count" description="Number of fragments exiting the fragment shader as killed."/>
+    <event event="0x21" title="Mali-4xx FP" name="Fragment rejected fwd-fragment-kill count" description="Number of fragments killed by forward fragment kill."/>
+    <event event="0x22" title="Mali-4xx FP" name="Fragment passed z/stencil count" description="Number of fragments passing Z and stencil test."/>
+    <event event="0x23" title="Mali-4xx FP" name="Patches rejected early z/stencil count" description="Number of patches rejected by EarlyZ. A patch can be 8x8, 4x4 or 2x2 pixels."/>
+    <event event="0x24" title="Mali-4xx FP" name="Patches evaluated" description="Number of patches evaluated for EarlyZ rejection."/>
+    <event event="0x25" title="Mali-4xx FP" name="Instruction completed count" description="Number of fragment shader instruction words completed. It is a function of pixels processed and the length of the shader programs."/>
+    <event event="0x26" title="Mali-4xx FP" name="Instruction failed rendezvous count" description="Number of fragment shader instructions not completed because of failed Rendezvous."/>
+    <event event="0x27" title="Mali-4xx FP" name="Instruction failed varying-miss count" description="Number of fragment shader instructions not completed because of failed varying operation."/>
+    <event event="0x28" title="Mali-4xx FP" name="Instruction failed texture-miss count" description="Number of fragment shader instructions not completed because of failed texture operation."/>
+    <event event="0x29" title="Mali-4xx FP" name="Instruction failed load-miss count" description="Number of fragment shader instructions not completed because of failed load operation."/>
+    <event event="0x2a" title="Mali-4xx FP" name="Instruction failed tile read-miss count" description="Number of fragment shader instructions not completed because of failed read from the tilebuffer."/>
+    <event event="0x2b" title="Mali-4xx FP" name="Instruction failed store-miss count" description="Number of fragment shader instructions not completed because of failed store operation."/>
+    <event event="0x2c" title="Mali-4xx FP" name="Rendezvous breakage count" description="Number of Rendezvous breakages reported."/>
+    <event event="0x2d" title="Mali-4xx FP" name="Pipeline bubbles cycle count" description="Number of unused cycles in the fragment shader while rendering is active."/>
+    <event event="0x2e" title="Mali-4xx FP" name="Texture mapper multipass count" description="Number of texture operations looped because of more texture passes needed."/>
+    <event event="0x2f" title="Mali-4xx FP" name="Texture mapper cycle count" description="Number of texture operation cycles."/>
+    <event event="0x30" title="Mali-4xx FP" name="Vertex cache hit count" description="Number of times a requested vertex was found in the cache (Number of vertex cache hits)."/>
+    <event event="0x31" title="Mali-4xx FP" name="Vertex cache miss count" description="Number of times a requested vertex was not found in the cache (Number of vertex cache misses)."/>
+    <event event="0x32" title="Mali-4xx FP" name="Varying cache hit count" description="Number of times a requested varying was found in the cache (Number of varying cache hits)."/>
+    <event event="0x33" title="Mali-4xx FP" name="Varying cache miss count" description="Number of times a requested varying was not found in the cache (Number of varying cache misses)."/>
+    <event event="0x34" title="Mali-4xx FP" name="Varying cache conflict miss count" description="Number of times a requested varying was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
+    <event event="0x35" title="Mali-4xx FP" name="Texture cache hit count" description="Number of times a requested texel was found in the texture cache (Number of texture cache hits)."/>
+    <event event="0x36" title="Mali-4xx FP" name="Texture cache miss count" description="Number of times a requested texel was not found in the texture cache (Number of texture cache misses)."/>
+    <event event="0x37" title="Mali-4xx FP" name="Texture cache conflict miss count" description="Number of times a requested texel was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
+    <event event="0x38" title="Mali-4xx FP" name="Compressed texture cache hit count" description="Number of times a requested item was found in the cache."/>
+    <event event="0x39" title="Mali-4xx FP" name="Compressed texture cache miss count" description="Number of times a requested item was not found in the cache."/>
+    <event event="0x3a" title="Mali-4xx FP" name="Load/Store cache hit count" description="Number of hits in the load/store cache."/>
+    <event event="0x3b" title="Mali-4xx FP" name="Load/Store cache miss count" description="Number of misses in the load/store cache."/>
+    <event event="0x3c" title="Mali-4xx FP" name="Program cache hit count" description="Number of hits in the program cache."/>
+    <event event="0x3d" title="Mali-4xx FP" name="Program cache miss count" description="Number of misses in the program cache."/>
+  </category>
+  <counter_set name="ARM_Mali-4xx_L2_0_cnt" title="Mali-4xx L2" description="Mali GPU L2 Cache Core 0" count="2"/>
+  <category name="Mali-4xx L2" counter_set="ARM_Mali-4xx_L2_0_cnt" per_cpu="no">
+    <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
+    <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
+    <option_set name="All">
+      <option event_delta="0x08" name="Master" description="Master"/>
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <option_set name="Slaves">
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
+    <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
+    <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
+    <event event="0x03" option_set="All" title="Mali L2 Cache" name="Words written" description="Words written"/>
+    <event event="0x04" option_set="Slaves" title="Mali L2 Cache" name="Read hits" description="Read hits"/>
+    <event event="0x05" option_set="Slaves" title="Mali L2 Cache" name="Read misses" description="Read misses"/>
+    <event event="0x06" option_set="Slaves" title="Mali L2 Cache" name="Write invalidates" description="Write invalidates"/>
+    <event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
+    <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
+  </category>
+  <counter_set name="ARM_Mali-4xx_L2_1_cnt" title="Mali-4xx L2 1" description="Mali GPU L2 Cache Core 1" count="2"/>
+  <category name="Mali-4xx L2_1" counter_set="ARM_Mali-4xx_L2_1_cnt" per_cpu="no">
+    <event event="0x01" title="Mali L2 Cache 1" name="Total clock cycles" description="Total clock cycles"/>
+    <event event="0x02" title="Mali L2 Cache 1" name="Active clock cycles" description="Active clock cycles"/>
+    <option_set name="All">
+      <option event_delta="0x08" name="Master" description="Master"/>
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <option_set name="Slaves">
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <event event="0x00" option_set="All" title="Mali L2 Cache 1" name="Read transactions" description="Read transactions"/>
+    <event event="0x01" option_set="All" title="Mali L2 Cache 1" name="Write transactions" description="Write transactions"/>
+    <event event="0x02" option_set="All" title="Mali L2 Cache 1" name="Words read" description="Words read"/>
+    <event event="0x03" option_set="All" title="Mali L2 Cache 1" name="Words written" description="Words written"/>
+    <event event="0x04" option_set="Slaves" title="Mali L2 Cache 1" name="Read hits" description="Read hits"/>
+    <event event="0x05" option_set="Slaves" title="Mali L2 Cache 1" name="Read misses" description="Read misses"/>
+    <event event="0x06" option_set="Slaves" title="Mali L2 Cache 1" name="Write invalidates" description="Write invalidates"/>
+    <event event="0x07" option_set="Slaves" title="Mali L2 Cache 1" name="Read invalidates" description="Read invalidates"/>
+    <event event="0x08" option_set="Slaves" title="Mali L2 Cache 1" name="Cacheable read transactions" description="Cacheable read transactions"/>
+  </category>
+  <counter_set name="ARM_Mali-4xx_L2_2_cnt" title="Mali-4xx L2 2" description="Mali GPU L2 Cache Core 2" count="2"/>
+  <category name="Mali-4xx L2_2" counter_set="ARM_Mali-4xx_L2_2_cnt" per_cpu="no">
+    <event event="0x01" title="Mali L2 Cache 2" name="Total clock cycles" description="Total clock cycles"/>
+    <event event="0x02" title="Mali L2 Cache 2" name="Active clock cycles" description="Active clock cycles"/>
+    <option_set name="All">
+      <option event_delta="0x08" name="Master" description="Master"/>
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <option_set name="Slaves">
+      <option event_delta="0x10" name="All slaves" description="All slaves"/>
+      <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+      <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+      <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+      <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+      <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+    </option_set>
+    <event event="0x00" option_set="All" title="Mali L2 Cache 2" name="Read transactions" description="Read transactions"/>
+    <event event="0x01" option_set="All" title="Mali L2 Cache 2" name="Write transactions" description="Write transactions"/>
+    <event event="0x02" option_set="All" title="Mali L2 Cache 2" name="Words read" description="Words read"/>
+    <event event="0x03" option_set="All" title="Mali L2 Cache 2" name="Words written" description="Words written"/>
+    <event event="0x04" option_set="Slaves" title="Mali L2 Cache 2" name="Read hits" description="Read hits"/>
+    <event event="0x05" option_set="Slaves" title="Mali L2 Cache 2" name="Read misses" description="Read misses"/>
+    <event event="0x06" option_set="Slaves" title="Mali L2 Cache 2" name="Write invalidates" description="Write invalidates"/>
+    <event event="0x07" option_set="Slaves" title="Mali L2 Cache 2" name="Read invalidates" description="Read invalidates"/>
+    <event event="0x08" option_set="Slaves" title="Mali L2 Cache 2" name="Cacheable read transactions" description="Cacheable read transactions"/>
+  </category>
+  <counter_set name="ARM_Mali-4xx_Filmstrip_cnt" count="1"/>
+  <category name="Mali-4xx Filmstrip" counter_set="ARM_Mali-4xx_Filmstrip_cnt" per_cpu="no">
+    <option_set name="fs">
+      <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
+      <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
+      <option event_delta="0xa" name="1:10" description="captures every 10th frame"/>
+    </option_set>
+    <event event="0x0400" option_set="fs" title="ARM Mali-4xx" name="Filmstrip" description="Scaled framebuffer"/>
+  </category>
+  <category name="ARM_Mali-4xx_Voltage" per_cpu="no">
+    <event counter="ARM_Mali-4xx_Voltage" title="Mali GPU Voltage" name="Voltage" class="absolute" display="average" average_selection="yes" units="mV" description="GPU core voltage."/>
+  </category>
+  <category name="ARM_Mali-4xx_Frequency" per_cpu="no">
+    <event counter="ARM_Mali-4xx_Frequency" title="Mali GPU Frequency" name="Frequency" display="average" average_selection="yes" units="MHz" description="GPU core frequency."/>
+  </category>
+  <category name="Mali-4xx Activity" counter_set="ARM_Mali-4xx_Activity_cnt">
+    <event counter="ARM_Mali-4xx_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="GPU Fragment Activity"/>
+    <event counter="ARM_Mali-4xx_vertex" title="GPU Vertex" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" description="GPU Vertex Activity"/>
+  </category>
+  <category name="Mali-4xx Software Counters" counter_set="ARM_Mali-4xx_SW_cnt" per_cpu="no">
+    <!-- EGL Counters -->
+    <event counter="ARM_Mali-4xx_SW_0" title="Mali EGL Software Counters" name="Blit Time" description="Time spent blitting the framebuffer from video memory to framebuffer."/>
+    <!-- glDrawElements Counters -->
+    <event counter="ARM_Mali-4xx_SW_1" title="glDrawElements Statistics" name="Calls to glDrawElements" description="Number of calls to glDrawElements."/>
+    <event counter="ARM_Mali-4xx_SW_2" title="glDrawElements Statistics" name="Indices to glDrawElements" description="Number of indices to glDrawElements."/>
+    <event counter="ARM_Mali-4xx_SW_3" title="glDrawElements Statistics" name="Transformed by glDrawElements" description="Number of vertices transformed by glDrawElements."/>
+    <!-- glDrawArrays Counters -->
+    <event counter="ARM_Mali-4xx_SW_4" title="glDrawArrays Statistics" name="Calls to glDrawArrays" description="Number of calls to glDrawArrays."/>
+    <event counter="ARM_Mali-4xx_SW_5" title="glDrawArrays Statistics" name="Transformed by glDrawArrays" description="Number of vertices transformed by glDrawArrays."/>
+    <!-- Draw Call Counters -->
+    <event counter="ARM_Mali-4xx_SW_6" title="Drawcall Statistics" name="Points" description="Number of calls to glDraw* with parameter GL_POINTS."/>
+    <event counter="ARM_Mali-4xx_SW_7" title="Drawcall Statistics" name="Lines" description="Number of calls to glDraw* with parameter GL_LINES."/>
+    <event counter="ARM_Mali-4xx_SW_8" title="Drawcall Statistics" name="Lineloop" description="Number of calls to glDraw* with parameter GL_LINE_LOOP."/>
+    <event counter="ARM_Mali-4xx_SW_9" title="Drawcall Statistics" name="Linestrip" description="Number of calls to glDraw* with parameter GL_LINE_STRIP."/>
+    <event counter="ARM_Mali-4xx_SW_10" title="Drawcall Statistics" name="Triangles" description="Number of calls to glDraw* with parameter GL_TRIANGLES."/>
+    <event counter="ARM_Mali-4xx_SW_11" title="Drawcall Statistics" name="Trianglestrip" description="Number of calls to glDraw* with parameter GL_TRIANGLE_STRIP."/>
+    <event counter="ARM_Mali-4xx_SW_12" title="Drawcall Statistics" name="Trianglefan" description="Number of calls to glDraw* with parameter GL_TRIANGLE_FAN."/>
+    <event counter="ARM_Mali-4xx_SW_13" title="Drawcall Statistics" name="Vertex Upload Time (us)" description="Time spent uploading vertex attributes and faceindex data not present in a VBO."/>
+    <event counter="ARM_Mali-4xx_SW_14" title="Drawcall Statistics" name="Uniform Bytes Copied (bytes)" description="Number of bytes copied to Mali memory as a result of uniforms update."/>
+    <!-- Buffer Profiling Counters -->
+    <event counter="ARM_Mali-4xx_SW_15" title="Buffer Profiling" name="Texture Upload Time (ms)" description="Time spent uploading textures."/>
+    <event counter="ARM_Mali-4xx_SW_16" title="Buffer Profiling" name="VBO Upload Time (ms)" description="Time spent uploading vertex buffer objects."/>
+    <event counter="ARM_Mali-4xx_SW_17" title="Buffer Profiling" name="FBO Flushes" description="Number of flushed on framebuffer attachment."/>
+    <!-- OpenGL ES 1.1 Emulation -->
+    <event counter="ARM_Mali-4xx_SW_18" title="Fixed-function Emulation" name="# Vertex Shaders Generated" description="Number of vertex shaders generated."/>
+    <event counter="ARM_Mali-4xx_SW_19" title="Fixed-function Emulation" name="# Fragment Shaders Generated" description="Number of fragment shaders generated."/>
+    <!-- Geometry Statistics -->
+    <event counter="ARM_Mali-4xx_SW_33" title="Geometry Statistics" name="Triangles" description="The total number of triangles passed to GLES per-frame."/>
+    <event counter="ARM_Mali-4xx_SW_34" title="Geometry Statistics" name="Independent Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLES."/>
+    <event counter="ARM_Mali-4xx_SW_35" title="Geometry Statistics" name="Strip Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_STRIP."/>
+    <event counter="ARM_Mali-4xx_SW_36" title="Geometry Statistics" name="Fan Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_FAN."/>
+    <event counter="ARM_Mali-4xx_SW_37" title="Geometry Statistics" name="Lines" description="Number of lines passed to GLES per-frame."/>
+    <event counter="ARM_Mali-4xx_SW_38" title="Geometry Statistics" name="Independent Lines" description="Number of lines passed to GLES using the mode GL_LINES."/>
+    <event counter="ARM_Mali-4xx_SW_39" title="Geometry Statistics" name="Strip Lines" description="Number of lines passed to GLES using the mode GL_LINE_STRIP."/>
+    <event counter="ARM_Mali-4xx_SW_40" title="Geometry Statistics" name="Loop Lines" description="Number of lines passed to GLES using the mode GL_LINE_LOOP."/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-Midgard.xml b/tools/gator/daemon/events-Mali-Midgard.xml
new file mode 100644
index 000000000000..b6ab4b88cd2e
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-Midgard.xml
@@ -0,0 +1,46 @@
+  <category name="Mali-Midgard Software Counters" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_TOTAL_ALLOC_PAGES" title="Mali Total Alloc Pages" name="Total number of allocated pages" description="Mali total number of allocated pages."/>
+  </category>
+  <category name="Mali-Midgard PM Shader" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_SHADER_0" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 0" description="Mali PM Shader: PM Shader Core 0."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_1" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 1" description="Mali PM Shader: PM Shader Core 1."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_2" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 2" description="Mali PM Shader: PM Shader Core 2."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_3" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 3" description="Mali PM Shader: PM Shader Core 3."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_4" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 4" description="Mali PM Shader: PM Shader Core 4."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_5" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 5" description="Mali PM Shader: PM Shader Core 5."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_6" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 6" description="Mali PM Shader: PM Shader Core 6."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_7" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 7" description="Mali PM Shader: PM Shader Core 7."/>
+  </category>
+  <category name="Mali-Midgard PM Tiler" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_TILER_0" display="average" average_selection="yes" percentage="yes" title="Mali PM Tiler" name="PM Tiler Core 0" description="Mali PM Tiler: PM Tiler Core 0."/>
+  </category>
+  <category name="Mali-Midgard PM L2" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_L2_0" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 0" description="Mali PM L2: PM L2 Core 0."/>
+    <event counter="ARM_Mali-Midgard_PM_L2_1" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 1" description="Mali PM L2: PM L2 Core 1."/>
+  </category>
+  <category name="Mali-Midgard MMU Address Space" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_MMU_AS_0" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 0" description="Mali MMU Address Space 0 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_1" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 1" description="Mali MMU Address Space 1 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_2" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 2" description="Mali MMU Address Space 2 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_3" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 3" description="Mali MMU Address Space 3 usage."/>
+  </category>
+  <category name="Mali-Midgard MMU Page Fault" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_0" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 0" description="Reports the number of newly allocated pages after a MMU page fault in address space 0."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_1" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 1" description="Reports the number of newly allocated pages after a MMU page fault in address space 1."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_2" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 2" description="Reports the number of newly allocated pages after a MMU page fault in address space 2."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_3" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 3" description="Reports the number of newly allocated pages after a MMU page fault in address space 3."/>
+  </category>
+  <counter_set name="ARM_Mali-Midgard_Filmstrip_cnt" count="1"/>
+  <category name="Mali-Midgard Filmstrip" counter_set="ARM_Mali-Midgard_Filmstrip_cnt" per_cpu="no">
+    <option_set name="fs">
+      <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
+      <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
+      <option event_delta="0xa" name="1:10" description="captures every 10th frame"/>
+    </option_set>
+    <event event="0x0400" option_set="fs" title="ARM Mali-Midgard" name="Filmstrip" description="Scaled framebuffer"/>
+  </category>
+  <category name="Mali-Midgard Activity" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 0 Activity"/>
+    <event counter="ARM_Mali-Midgard_vertex" title="GPU Vertex-Tiling-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 1 Activity"/>
+    <event counter="ARM_Mali-Midgard_opencl" title="GPU Vertex-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00ef022f" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 2 Activity"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-Midgard_hw.xml b/tools/gator/daemon/events-Mali-Midgard_hw.xml
new file mode 100644
index 000000000000..4f3323f197d7
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-Midgard_hw.xml
@@ -0,0 +1,91 @@
+  <category name="Mali-Midgard Job Manager" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles the GPU was active"/>
+    <event counter="ARM_Mali-Midgard_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles the GPU had a pending interrupt"/>
+    <event counter="ARM_Mali-Midgard_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) was active"/>
+    <event counter="ARM_Mali-Midgard_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) was active"/>
+    <event counter="ARM_Mali-Midgard_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) was active"/>
+    <event counter="ARM_Mali-Midgard_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-Midgard_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-Midgard_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-Midgard_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-Midgard_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-Midgard_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+  </category>
+  <category name="Mali-Midgard Tiler" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-Midgard_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-Midgard_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-Midgard_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-Midgard_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+    <event counter="ARM_Mali-Midgard_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-Midgard_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+    <event counter="ARM_Mali-Midgard_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-Midgard_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-Midgard_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-Midgard_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-Midgard_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-Midgard_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-Midgard_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-Midgard_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_1" title="Mali Tiler Commands" name="Prims in 1 command" description="Number of primitives producing 1 command"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_2" title="Mali Tiler Commands" name="Prims in 2 command" description="Number of primitives producing 2 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_3" title="Mali Tiler Commands" name="Prims in 3 command" description="Number of primitives producing 3 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_4" title="Mali Tiler Commands" name="Prims in 4 command" description="Number of primitives producing 4 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_4_7" title="Mali Tiler Commands" name="Prims in 4-7 commands" description="Number of primitives producing 4-7 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_5_7" title="Mali Tiler Commands" name="Prims in 5-7 commands" description="Number of primitives producing 5-7 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_8_15" title="Mali Tiler Commands" name="Prims in 8-15 commands" description="Number of primitives producing 8-15 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_16_63" title="Mali Tiler Commands" name="Prims in 16-63 commands" description="Number of primitives producing 16-63 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_64" title="Mali Tiler Commands" name="Prims in &gt;= 64 commands" description="Number of primitives producing &gt;= 64 commands"/>
+  </category>
+  <category name="Mali-Midgard Shader Core" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles the Tripipe was active"/>
+    <event counter="ARM_Mali-Midgard_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-Midgard_FRAG_CYCLE_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS" title="Mali Core Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-Midgard_FRAG_DUMMY_THREADS" title="Mali Core Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_TASKS" title="Mali Compute Threads" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads started" description="Number of compute threads started"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_CYCLES_DESC" title="Mali Compute Threads" name="Compute cycles awaiting descriptors" description="Number of compute cycles spent waiting for descriptors"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMATIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMATIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-Midgard_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+    <event counter="ARM_Mali-Midgard_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+    <event counter="ARM_Mali-Midgard_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-Midgard_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+    <event counter="ARM_Mali-Midgard_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-Midgard_TEX_THREADS" title="Mali Texture Pipe" name="T instruction issues" description="Number of instructions issused to the T-pipe, including restarts"/>
+    <event counter="ARM_Mali-Midgard_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+    <event counter="ARM_Mali-Midgard_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+  </category>
+  <category name="Mali-Midgard L2 and MMU" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_L2_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-Midgard_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-Midgard_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-Midgard_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-Midgard_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T60x_hw.xml b/tools/gator/daemon/events-Mali-T60x_hw.xml
new file mode 100644
index 000000000000..50797e6492ad
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T60x_hw.xml
@@ -0,0 +1,108 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T60x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T60x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T60x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T60x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T60x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T60x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T60x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T60x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T60x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T60x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T60x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T60x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T60x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T60x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T60x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T60x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T60x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T60x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T60x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T60x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T60x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T60x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T60x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T60x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T60x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T60x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T60x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T60x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T60x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T60x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T60x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T60x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T60x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T60x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T60x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T60x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T60x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T60x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T62x_hw.xml b/tools/gator/daemon/events-Mali-T62x_hw.xml
new file mode 100644
index 000000000000..6ecc53c2ada1
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T62x_hw.xml
@@ -0,0 +1,109 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T62x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T62x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T62x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T62x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T62x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T62x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T62x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T62x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T62x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T62x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T62x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T62x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T62x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T62x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T62x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T62x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T62x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T62x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T62x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T62x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T62x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-T62x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T62x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T62x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T62x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T62x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T62x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T62x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T62x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T62x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T62x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T62x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T62x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T62x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T62x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T62x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T62x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T62x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T62x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T72x_hw.xml b/tools/gator/daemon/events-Mali-T72x_hw.xml
new file mode 100644
index 000000000000..5587534770c8
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T72x_hw.xml
@@ -0,0 +1,95 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T72x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T72x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T72x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T72x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T72x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T72x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T72x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T72x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T72x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T72x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T72x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T72x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T72x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T72x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T72x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T72x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T72x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T72x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T72x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T72x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T72x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T72x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T72x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T72x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T72x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T72x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T72x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T72x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe"/>
+
+    <event counter="ARM_Mali-T72x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T72x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T72x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T72x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+
+    <event counter="ARM_Mali-T72x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T72x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T72x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T72x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T76x_hw.xml b/tools/gator/daemon/events-Mali-T76x_hw.xml
new file mode 100644
index 000000000000..be74c5a42624
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T76x_hw.xml
@@ -0,0 +1,108 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T76x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T76x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T76x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T76x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T76x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T76x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T76x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T76x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T76x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T76x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T76x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T76x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T76x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T76x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T76x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T76x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T76x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T76x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T76x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T76x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T76x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-T76x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T76x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T76x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T76x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T76x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T76x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T76x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T76x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T76x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T76x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T76x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T76x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T76x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T76x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T76x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T76x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T76x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T76x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-V500.xml b/tools/gator/daemon/events-Mali-V500.xml
new file mode 100644
index 000000000000..89bc7f4734df
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-V500.xml
@@ -0,0 +1,30 @@
+  <category name="Mali-V500">
+    <event counter="ARM_Mali-V500_cnt0" title="MVE-V500 Stats" name="Samples" class="absolute" description="The number of times we have taken a sample"/>
+    <event counter="ARM_Mali-V500_cnt1" title="MVE-V500 Input Totals" name="Queued input-buffers" class="absolute" description="The number of input-buffers that has been queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt2" title="MVE-V500 Input Totals" name="Consumed input-buffers" class="absolute" description="The number of input-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt3" title="MVE-V500 Output Totals" name="Queued output-buffers" class="absolute" description="The number of output-buffers that has been queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt4" title="MVE-V500 Output Totals" name="Consumed output-buffers" class="absolute" description="The number of output-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt5" title="MVE-V500 Stats" name="Created Sessions" class="absolute" description="The number of created sessions throughout the lifetime of the process"/>
+    <event counter="ARM_Mali-V500_cnt6" title="MVE-V500 Sessions" name="Active Sessions" description="The number of currently existing sessions"/>
+    <event counter="ARM_Mali-V500_cnt7" title="MVE-V500 Stats" name="Processed Frames" class="absolute" description="The number of processed frames. A processed frame is one where the encode or decode is complete for that particular frame. Frames can be processed out of order so this is not the same as the number of output-buffers returned"/>
+    <event counter="ARM_Mali-V500_cnt8" title="MVE-V500 Input Totals" name="Input Flushes Requested" class="absolute" description="The number of requested flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt9" title="MVE-V500 Input Totals" name="Input Flushes Complete" class="absolute" description="The number of completed flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt10" title="MVE-V500 Output Totals" name="Output Flushes Requested" class="absolute" description="The number of requested flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt11" title="MVE-V500 Output Totals" name="Output Flushes Complete" class="absolute" description="The number of completed flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt12" title="MVE-V500 Output" name="Queued Output Buffers (current)" description="The number of output-buffers that are currently queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt13" title="MVE-V500 Input" name="Queued Input Buffers (current)" description="The number of input-buffers that are currently queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt14" title="MVE-V500 Output" name="Output Queue Flushes" description="The number of pending flushes for the MVE output-queue"/>
+    <event counter="ARM_Mali-V500_cnt15" title="MVE-V500 Input" name="Input Queue Flushes" description="The number of pending flushes for the MVE input-queue"/>
+    <event counter="ARM_Mali-V500_cnt16" title="MVE-V500 Stats" name="Errors encountered" class="absolute" description="The number of errors encountered"/>
+    <event counter="ARM_Mali-V500_cnt17" title="MVE-V500 Bandwidth" name="Bits consumed" class="absolute" description="The number of bits consumed during decode"/>
+    <event counter="ARM_Mali-V500_cnt18" title="MVE-V500 Bandwidth" name="AFBC bandwidth" class="absolute" description="The amount of AFBC-encoded bytes read or written"/>
+    <event counter="ARM_Mali-V500_cnt19" title="MVE-V500 Bandwidth" name="Bandwidth (read)" class="absolute" description="The amount of bytes read over the AXI bus"/>
+    <event counter="ARM_Mali-V500_cnt20" title="MVE-V500 Bandwidth" name="Bandwidth (write)" class="absolute" description="The amount of bytes written over the AXI bus"/>
+    <event counter="ARM_Mali-V500_evn0" title="MVE-V500 Sessions" name="Session created" description="Generated when a session has been created"/>
+    <event counter="ARM_Mali-V500_evn1" title="MVE-V500 Sessions" name="Session destroyed" description="Generated when a session has been destroyed"/>
+    <event counter="ARM_Mali-V500_evn2" title="MVE-V500 Frames" name="Frame Processed" description="Generated when the MVE has finished processing a frame"/>
+    <event counter="ARM_Mali-V500_evn3" title="MVE-V500 Output" name="Output buffer received" description="Generated when an an output buffer is returned to us from the MVE"/>
+    <event counter="ARM_Mali-V500_evn4" title="MVE-V500 Input" name="Input buffer received" description="Generated when we an input buffer is returned to us from the MVE"/>
+    <event counter="ARM_Mali-V500_act0" title="MVE-V500 Parsed" name="Activity" class="activity" activity1="activity" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
+    <event counter="ARM_Mali-V500_act1" title="MVE-V500 Piped" name="Activity" class="activity" activity1="activity" activity_color1="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
+  </category>
diff --git a/tools/gator/daemon/events-Perf-Hardware.xml b/tools/gator/daemon/events-Perf-Hardware.xml
new file mode 100644
index 000000000000..423696f82420
--- /dev/null
+++ b/tools/gator/daemon/events-Perf-Hardware.xml
@@ -0,0 +1,12 @@
+  <counter_set name="Perf_Hardware_cnt" count="6"/>
+  <category name="Perf Hardware" counter_set="Perf_Hardware_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="Perf_Hardware_ccnt" event="0" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="1" title="Instruction" name="Executed" description="Instruction executed"/>
+    <event event="2" title="Cache" name="References" description="Cache References"/>
+    <event event="3" title="Cache" name="Misses" description="Cache Misses"/>
+    <event event="4" title="Branch" name="Instructions" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="5" title="Branch" name="Misses" description="Branch mispredicted or not predicted"/>
+    <event event="6" title="Bus" name="Cycles" description="Bus Cycles"/>
+    <event event="7" title="Instruction" name="Stalled Frontend" description="Stalled Frontend Cycles"/>
+    <event event="8" title="Instruction" name="Stalled Backend" description="Stalled Backend Cycles"/>
+  </category>
diff --git a/tools/gator/daemon/events-Scorpion.xml b/tools/gator/daemon/events-Scorpion.xml
new file mode 100644
index 000000000000..fa716fdc4840
--- /dev/null
+++ b/tools/gator/daemon/events-Scorpion.xml
@@ -0,0 +1,107 @@
+  <counter_set name="Scorpion_cnt" count="4"/>
+  <category name="Scorpion" counter_set="Scorpion_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="Scorpion_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Program Counter" name="SW change" description="Software change of PC, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0e" title="Branch" name="Procedure Return" description="Procedure return architecturally executed (not by exceptions)"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x4c" title="Scorpion" name="ICACHE_EXPL_INV" description="I-cache explicit invalidates"/>
+    <event event="0x4d" title="Scorpion" name="ICACHE_MISS" description="I-cache misses"/>
+    <event event="0x4e" title="Scorpion" name="ICACHE_ACCESS" description="I-cache accesses"/>
+    <event event="0x4f" title="Scorpion" name="ICACHE_CACHEREQ_L2" description="I-cache cacheable requests to L2"/>
+    <event event="0x50" title="Scorpion" name="ICACHE_NOCACHE_L2" description="I-cache non-cacheable requests to L2"/>
+    <event event="0x51" title="Scorpion" name="HIQUP_NOPED" description="Conditional instructions HIQUPs NOPed"/>
+    <event event="0x52" title="Scorpion" name="DATA_ABORT" description="Interrupts and Exceptions Data Abort"/>
+    <event event="0x53" title="Scorpion" name="IRQ" description="Interrupts and Exceptions IRQ"/>
+    <event event="0x54" title="Scorpion" name="FIQ" description="Interrupts and Exceptions FIQ"/>
+    <event event="0x55" title="Scorpion" name="ALL_EXCPT" description="Interrupts and Exceptions All interrupts"/>
+    <event event="0x56" title="Scorpion" name="UNDEF" description="Interrupts and Exceptions Undefined"/>
+    <event event="0x57" title="Scorpion" name="SVC" description="Interrupts and Exceptions SVC"/>
+    <event event="0x58" title="Scorpion" name="SMC" description="Interrupts and Exceptions SMC"/>
+    <event event="0x59" title="Scorpion" name="PREFETCH_ABORT" description="Interrupts and Exceptions Prefetch Abort"/>
+    <event event="0x5a" title="Scorpion" name="INDEX_CHECK" description="Interrupts and Exceptions Index Check"/>
+    <event event="0x5b" title="Scorpion" name="NULL_CHECK" description="Interrupts and Exceptions Null Check"/>
+    <event event="0x5c" title="Scorpion" name="EXPL_ICIALLU" description="I-cache and BTAC Invalidates Explicit ICIALLU"/>
+    <event event="0x5d" title="Scorpion" name="IMPL_ICIALLU" description="I-cache and BTAC Invalidates Implicit ICIALLU"/>
+    <event event="0x5e" title="Scorpion" name="NONICIALLU_BTAC_INV" description="I-cache and BTAC Invalidates Non-ICIALLU BTAC Invalidate"/>
+    <event event="0x5f" title="Scorpion" name="ICIMVAU_IMPL_ICIALLU" description="I-cache and BTAC Invalidates ICIMVAU-implied ICIALLU"/>
+    <event event="0x60" title="Scorpion" name="SPIPE_ONLY_CYCLES" description="Issue S-pipe only issue cycles"/>
+    <event event="0x61" title="Scorpion" name="XPIPE_ONLY_CYCLES" description="Issue X-pipe only issue cycles"/>
+    <event event="0x62" title="Scorpion" name="DUAL_CYCLES" description="Issue dual issue cycles"/>
+    <event event="0x63" title="Scorpion" name="DISPATCH_ANY_CYCLES" description="Dispatch any dispatch cycles"/>
+    <event event="0x64" title="Scorpion" name="FIFO_FULLBLK_CMT" description="Commits Trace FIFO full Blk CMT"/>
+    <event event="0x65" title="Scorpion" name="FAIL_COND_INST" description="Conditional instructions failing conditional instrs (excluding branches)"/>
+    <event event="0x66" title="Scorpion" name="PASS_COND_INST" description="Conditional instructions passing conditional instrs (excluding branches)"/>
+    <event event="0x67" title="Scorpion" name="ALLOW_VU_CLK" description="Unit Clock Gating Allow VU Clks"/>
+    <event event="0x68" title="Scorpion" name="VU_IDLE" description="Unit Clock Gating VU Idle"/>
+    <event event="0x69" title="Scorpion" name="ALLOW_L2_CLK" description="Unit Clock Gating Allow L2 Clks"/>
+    <event event="0x6a" title="Scorpion" name="L2_IDLE" description="Unit Clock Gating L2 Idle"/>
+    <event event="0x6b" title="Scorpion" name="DTLB_IMPL_INV_SCTLR_DACR" description="DTLB implicit invalidates writes to SCTLR and DACR"/>
+    <event event="0x6c" title="Scorpion" name="DTLB_EXPL_INV" description="DTLB explicit invalidates"/>
+    <event event="0x6d" title="Scorpion" name="DTLB_MISS" description="DTLB misses"/>
+    <event event="0x6e" title="Scorpion" name="DTLB_ACCESS" description="DTLB accesses"/>
+    <event event="0x6f" title="Scorpion" name="ITLB_MISS" description="ITLB misses"/>
+    <event event="0x70" title="Scorpion" name="ITLB_IMPL_INV" description="ITLB implicit ITLB invalidates"/>
+    <event event="0x71" title="Scorpion" name="ITLB_EXPL_INV" description="ITLB explicit ITLB invalidates"/>
+    <event event="0x72" title="Scorpion" name="UTLB_D_MISS" description="UTLB d-side misses"/>
+    <event event="0x73" title="Scorpion" name="UTLB_D_ACCESS" description="UTLB d-side accesses"/>
+    <event event="0x74" title="Scorpion" name="UTLB_I_MISS" description="UTLB i-side misses"/>
+    <event event="0x75" title="Scorpion" name="UTLB_I_ACCESS" description="UTLB i-side accesses"/>
+    <event event="0x76" title="Scorpion" name="UTLB_INV_ASID" description="UTLB invalidate by ASID"/>
+    <event event="0x77" title="Scorpion" name="UTLB_INV_MVA" description="UTLB invalidate by MVA"/>
+    <event event="0x78" title="Scorpion" name="UTLB_INV_ALL" description="UTLB invalidate all"/>
+    <event event="0x79" title="Scorpion" name="S2_HOLD_RDQ_UNAVAIL" description="S2 hold RDQ unavail"/>
+    <event event="0x7a" title="Scorpion" name="S2_HOLD" description="S2 hold"/>
+    <event event="0x7b" title="Scorpion" name="S2_HOLD_DEV_OP" description="S2 hold device op"/>
+    <event event="0x7c" title="Scorpion" name="S2_HOLD_ORDER" description="S2 hold strongly ordered op"/>
+    <event event="0x7d" title="Scorpion" name="S2_HOLD_BARRIER" description="S2 hold barrier"/>
+    <event event="0x7e" title="Scorpion" name="VIU_DUAL_CYCLE" description="Scorpion VIU dual cycle"/>
+    <event event="0x7f" title="Scorpion" name="VIU_SINGLE_CYCLE" description="Scorpion VIU single cycle"/>
+    <event event="0x80" title="Scorpion" name="VX_PIPE_WAR_STALL_CYCLES" description="Scorpion VX pipe WAR cycles"/>
+    <event event="0x81" title="Scorpion" name="VX_PIPE_WAW_STALL_CYCLES" description="Scorpion VX pipe WAW cycles"/>
+    <event event="0x82" title="Scorpion" name="VX_PIPE_RAW_STALL_CYCLES" description="Scorpion VX pipe RAW cycles"/>
+    <event event="0x83" title="Scorpion" name="VX_PIPE_LOAD_USE_STALL" description="Scorpion VX pipe load use stall"/>
+    <event event="0x84" title="Scorpion" name="VS_PIPE_WAR_STALL_CYCLES" description="Scorpion VS pipe WAR stall cycles"/>
+    <event event="0x85" title="Scorpion" name="VS_PIPE_WAW_STALL_CYCLES" description="Scorpion VS pipe WAW stall cycles"/>
+    <event event="0x86" title="Scorpion" name="VS_PIPE_RAW_STALL_CYCLES" description="Scorpion VS pipe RAW stall cycles"/>
+    <event event="0x87" title="Scorpion" name="EXCEPTIONS_INV_OPERATION" description="Scorpion invalid operation exceptions"/>
+    <event event="0x88" title="Scorpion" name="EXCEPTIONS_DIV_BY_ZERO" description="Scorpion divide by zero exceptions"/>
+    <event event="0x89" title="Scorpion" name="COND_INST_FAIL_VX_PIPE" description="Scorpion conditional instruction fail VX pipe"/>
+    <event event="0x8a" title="Scorpion" name="COND_INST_FAIL_VS_PIPE" description="Scorpion conditional instruction fail VS pipe"/>
+    <event event="0x8b" title="Scorpion" name="EXCEPTIONS_OVERFLOW" description="Scorpion overflow exceptions"/>
+    <event event="0x8c" title="Scorpion" name="EXCEPTIONS_UNDERFLOW" description="Scorpion underflow exceptions"/>
+    <event event="0x8d" title="Scorpion" name="EXCEPTIONS_DENORM" description="Scorpion denorm exceptions"/>
+    <event event="0x8e" title="Scorpion" name="BANK_AB_HIT" description="L2 hit rates bank A/B hits"/>
+    <event event="0x8f" title="Scorpion" name="BANK_AB_ACCESS" description="L2 hit rates bank A/B accesses"/>
+    <event event="0x90" title="Scorpion" name="BANK_CD_HIT" description="L2 hit rates bank C/D hits"/>
+    <event event="0x91" title="Scorpion" name="BANK_CD_ACCESS" description="L2 hit rates bank C/D accesses"/>
+    <event event="0x92" title="Scorpion" name="BANK_AB_DSIDE_HIT" description="L2 hit rates bank A/B d-side hits"/>
+    <event event="0x93" title="Scorpion" name="BANK_AB_DSIDE_ACCESS" description="L2 hit rates bank A/B d-side accesses"/>
+    <event event="0x94" title="Scorpion" name="BANK_CD_DSIDE_HIT" description="L2 hit rates bank C/D d-side hits"/>
+    <event event="0x95" title="Scorpion" name="BANK_CD_DSIDE_ACCESS" description="L2 hit rates bank C/D d-side accesses"/>
+    <event event="0x96" title="Scorpion" name="BANK_AB_ISIDE_HIT" description="L2 hit rates bank A/B i-side hits"/>
+    <event event="0x97" title="Scorpion" name="BANK_AB_ISIDE_ACCESS" description="L2 hit rates bank A/B i-side accesses"/>
+    <event event="0x98" title="Scorpion" name="BANK_CD_ISIDE_HIT" description="L2 hit rates bank C/D i-side hits"/>
+    <event event="0x99" title="Scorpion" name="BANK_CD_ISIDE_ACCESS" description="L2 hit rates bank C/D i-side accesses"/>
+    <event event="0x9a" title="Scorpion" name="ISIDE_RD_WAIT" description="fills and castouts cycles that i-side RD requests wait on data from bus"/>
+    <event event="0x9b" title="Scorpion" name="DSIDE_RD_WAIT" description="fills and castouts cycles that d-side RD requests wait on data from bus"/>
+    <event event="0x9c" title="Scorpion" name="BANK_BYPASS_WRITE" description="fills and castouts bank bypass writes"/>
+    <event event="0x9d" title="Scorpion" name="BANK_AB_NON_CASTOUT" description="fills and castouts bank A/B non-castout writes to bus"/>
+    <event event="0x9e" title="Scorpion" name="BANK_AB_L2_CASTOUT" description="fills and castouts bank A/B L2 castouts (granules)"/>
+    <event event="0x9f" title="Scorpion" name="BANK_CD_NON_CASTOUT" description="fills and castouts bank C/D non-castout writes to bus"/>
+    <event event="0xa0" title="Scorpion" name="BANK_CD_L2_CASTOUT" description="fills and castouts bank C/D L2 castouts (granules)"/>
+  </category>
diff --git a/tools/gator/daemon/events-ScorpionMP.xml b/tools/gator/daemon/events-ScorpionMP.xml
new file mode 100644
index 000000000000..c648ccefb287
--- /dev/null
+++ b/tools/gator/daemon/events-ScorpionMP.xml
@@ -0,0 +1,90 @@
+  <counter_set name="ScorpionMP_cnt" count="4"/>
+  <category name="ScorpionMP" counter_set="ScorpionMP_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ScorpionMP_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+    <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
+    <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+    <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+    <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+    <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+    <event event="0x06" title="Instruction" name="Memory read" description="Memory-reading instruction architecturally executed"/>
+    <event event="0x07" title="Instruction" name="Memory write" description="Memory-writing instruction architecturally executed"/>
+    <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+    <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+    <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+    <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+    <event event="0x0c" title="Program Counter" name="SW change" description="Software change of PC, except by an exception, architecturally executed"/>
+    <event event="0x0d" title="Branch" name="Immediate" description="Immediate branch architecturally executed"/>
+    <event event="0x0e" title="Branch" name="Procedure Return" description="Procedure return architecturally executed (not by exceptions)"/>
+    <event event="0x0f" title="Memory" name="Unaligned access" description="Unaligned access architecturally executed"/>
+    <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+    <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+    <event event="0x4c" title="Scorpion" name="ICACHE_EXPL_INV" description="I-cache explicit invalidates"/>
+    <event event="0x4d" title="Scorpion" name="ICACHE_MISS" description="I-cache misses"/>
+    <event event="0x4e" title="Scorpion" name="ICACHE_ACCESS" description="I-cache accesses"/>
+    <event event="0x4f" title="Scorpion" name="ICACHE_CACHEREQ_L2" description="I-cache cacheable requests to L2"/>
+    <event event="0x50" title="Scorpion" name="ICACHE_NOCACHE_L2" description="I-cache non-cacheable requests to L2"/>
+    <event event="0x51" title="Scorpion" name="HIQUP_NOPED" description="Conditional instructions HIQUPs NOPed"/>
+    <event event="0x52" title="Scorpion" name="DATA_ABORT" description="Interrupts and Exceptions Data Abort"/>
+    <event event="0x53" title="Scorpion" name="IRQ" description="Interrupts and Exceptions IRQ"/>
+    <event event="0x54" title="Scorpion" name="FIQ" description="Interrupts and Exceptions FIQ"/>
+    <event event="0x55" title="Scorpion" name="ALL_EXCPT" description="Interrupts and Exceptions All interrupts"/>
+    <event event="0x56" title="Scorpion" name="UNDEF" description="Interrupts and Exceptions Undefined"/>
+    <event event="0x57" title="Scorpion" name="SVC" description="Interrupts and Exceptions SVC"/>
+    <event event="0x58" title="Scorpion" name="SMC" description="Interrupts and Exceptions SMC"/>
+    <event event="0x59" title="Scorpion" name="PREFETCH_ABORT" description="Interrupts and Exceptions Prefetch Abort"/>
+    <event event="0x5a" title="Scorpion" name="INDEX_CHECK" description="Interrupts and Exceptions Index Check"/>
+    <event event="0x5b" title="Scorpion" name="NULL_CHECK" description="Interrupts and Exceptions Null Check"/>
+    <event event="0x5c" title="Scorpion" name="EXPL_ICIALLU" description="I-cache and BTAC Invalidates Explicit ICIALLU"/>
+    <event event="0x5d" title="Scorpion" name="IMPL_ICIALLU" description="I-cache and BTAC Invalidates Implicit ICIALLU"/>
+    <event event="0x5e" title="Scorpion" name="NONICIALLU_BTAC_INV" description="I-cache and BTAC Invalidates Non-ICIALLU BTAC Invalidate"/>
+    <event event="0x5f" title="Scorpion" name="ICIMVAU_IMPL_ICIALLU" description="I-cache and BTAC Invalidates ICIMVAU-implied ICIALLU"/>
+    <event event="0x60" title="Scorpion" name="SPIPE_ONLY_CYCLES" description="Issue S-pipe only issue cycles"/>
+    <event event="0x61" title="Scorpion" name="XPIPE_ONLY_CYCLES" description="Issue X-pipe only issue cycles"/>
+    <event event="0x62" title="Scorpion" name="DUAL_CYCLES" description="Issue dual issue cycles"/>
+    <event event="0x63" title="Scorpion" name="DISPATCH_ANY_CYCLES" description="Dispatch any dispatch cycles"/>
+    <event event="0x64" title="Scorpion" name="FIFO_FULLBLK_CMT" description="Commits Trace FIFO full Blk CMT"/>
+    <event event="0x65" title="Scorpion" name="FAIL_COND_INST" description="Conditional instructions failing conditional instrs (excluding branches)"/>
+    <event event="0x66" title="Scorpion" name="PASS_COND_INST" description="Conditional instructions passing conditional instrs (excluding branches)"/>
+    <event event="0x67" title="Scorpion" name="ALLOW_VU_CLK" description="Unit Clock Gating Allow VU Clks"/>
+    <event event="0x68" title="Scorpion" name="VU_IDLE" description="Unit Clock Gating VU Idle"/>
+    <event event="0x69" title="Scorpion" name="ALLOW_L2_CLK" description="Unit Clock Gating Allow L2 Clks"/>
+    <event event="0x6a" title="Scorpion" name="L2_IDLE" description="Unit Clock Gating L2 Idle"/>
+    <event event="0x6b" title="Scorpion" name="DTLB_IMPL_INV_SCTLR_DACR" description="DTLB implicit invalidates writes to SCTLR and DACR"/>
+    <event event="0x6c" title="Scorpion" name="DTLB_EXPL_INV" description="DTLB explicit invalidates"/>
+    <event event="0x6d" title="Scorpion" name="DTLB_MISS" description="DTLB misses"/>
+    <event event="0x6e" title="Scorpion" name="DTLB_ACCESS" description="DTLB accesses"/>
+    <event event="0x6f" title="Scorpion" name="ITLB_MISS" description="ITLB misses"/>
+    <event event="0x70" title="Scorpion" name="ITLB_IMPL_INV" description="ITLB implicit ITLB invalidates"/>
+    <event event="0x71" title="Scorpion" name="ITLB_EXPL_INV" description="ITLB explicit ITLB invalidates"/>
+    <event event="0x72" title="Scorpion" name="UTLB_D_MISS" description="UTLB d-side misses"/>
+    <event event="0x73" title="Scorpion" name="UTLB_D_ACCESS" description="UTLB d-side accesses"/>
+    <event event="0x74" title="Scorpion" name="UTLB_I_MISS" description="UTLB i-side misses"/>
+    <event event="0x75" title="Scorpion" name="UTLB_I_ACCESS" description="UTLB i-side accesses"/>
+    <event event="0x76" title="Scorpion" name="UTLB_INV_ASID" description="UTLB invalidate by ASID"/>
+    <event event="0x77" title="Scorpion" name="UTLB_INV_MVA" description="UTLB invalidate by MVA"/>
+    <event event="0x78" title="Scorpion" name="UTLB_INV_ALL" description="UTLB invalidate all"/>
+    <event event="0x79" title="Scorpion" name="S2_HOLD_RDQ_UNAVAIL" description="S2 hold RDQ unavail"/>
+    <event event="0x7a" title="Scorpion" name="S2_HOLD" description="S2 hold"/>
+    <event event="0x7b" title="Scorpion" name="S2_HOLD_DEV_OP" description="S2 hold device op"/>
+    <event event="0x7c" title="Scorpion" name="S2_HOLD_ORDER" description="S2 hold strongly ordered op"/>
+    <event event="0x7d" title="Scorpion" name="S2_HOLD_BARRIER" description="S2 hold barrier"/>
+    <event event="0x7e" title="Scorpion" name="VIU_DUAL_CYCLE" description="Scorpion VIU dual cycle"/>
+    <event event="0x7f" title="Scorpion" name="VIU_SINGLE_CYCLE" description="Scorpion VIU single cycle"/>
+    <event event="0x80" title="Scorpion" name="VX_PIPE_WAR_STALL_CYCLES" description="Scorpion VX pipe WAR cycles"/>
+    <event event="0x81" title="Scorpion" name="VX_PIPE_WAW_STALL_CYCLES" description="Scorpion VX pipe WAW cycles"/>
+    <event event="0x82" title="Scorpion" name="VX_PIPE_RAW_STALL_CYCLES" description="Scorpion VX pipe RAW cycles"/>
+    <event event="0x83" title="Scorpion" name="VX_PIPE_LOAD_USE_STALL" description="Scorpion VX pipe load use stall"/>
+    <event event="0x84" title="Scorpion" name="VS_PIPE_WAR_STALL_CYCLES" description="Scorpion VS pipe WAR stall cycles"/>
+    <event event="0x85" title="Scorpion" name="VS_PIPE_WAW_STALL_CYCLES" description="Scorpion VS pipe WAW stall cycles"/>
+    <event event="0x86" title="Scorpion" name="VS_PIPE_RAW_STALL_CYCLES" description="Scorpion VS pipe RAW stall cycles"/>
+    <event event="0x87" title="Scorpion" name="EXCEPTIONS_INV_OPERATION" description="Scorpion invalid operation exceptions"/>
+    <event event="0x88" title="Scorpion" name="EXCEPTIONS_DIV_BY_ZERO" description="Scorpion divide by zero exceptions"/>
+    <event event="0x89" title="Scorpion" name="COND_INST_FAIL_VX_PIPE" description="Scorpion conditional instruction fail VX pipe"/>
+    <event event="0x8a" title="Scorpion" name="COND_INST_FAIL_VS_PIPE" description="Scorpion conditional instruction fail VS pipe"/>
+    <event event="0x8b" title="Scorpion" name="EXCEPTIONS_OVERFLOW" description="Scorpion overflow exceptions"/>
+    <event event="0x8c" title="Scorpion" name="EXCEPTIONS_UNDERFLOW" description="Scorpion underflow exceptions"/>
+    <event event="0x8d" title="Scorpion" name="EXCEPTIONS_DENORM" description="Scorpion denorm exceptions"/>
+    <event event="0x8e" title="ScorpionMP" name="NUM_BARRIERS" description="Barriers"/>
+    <event event="0x8f" title="ScorpionMP" name="BARRIER_CYCLES" description="Barrier cycles"/>
+  </category>
diff --git a/tools/gator/daemon/events-ftrace.xml b/tools/gator/daemon/events-ftrace.xml
new file mode 100644
index 000000000000..33ab7aab2196
--- /dev/null
+++ b/tools/gator/daemon/events-ftrace.xml
@@ -0,0 +1,7 @@
+  <category name="Ftrace">
+    <!-- counter attribute must start with ftrace_ and be unique -->
+    <!-- regex item in () is the value shown -->
+    <!--
+    <event counter="ftrace_trace_marker_numbers" title="ftrace" name="trace_marker" class="absolute" regex="([0-9]+)" description="Numbers written to /sys/kernel/debug/tracing/trace_marker, ex: echo 42 > /sys/kernel/debug/tracing/trace_marker"/>
+    -->
+  </category>
diff --git a/tools/gator/daemon/events_footer.xml b/tools/gator/daemon/events_footer.xml
new file mode 100644
index 000000000000..cd2b44665bab
--- /dev/null
+++ b/tools/gator/daemon/events_footer.xml
@@ -0,0 +1 @@
+</events>
diff --git a/tools/gator/daemon/events_header.xml b/tools/gator/daemon/events_header.xml
new file mode 100644
index 000000000000..38ec4c03246e
--- /dev/null
+++ b/tools/gator/daemon/events_header.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<events>
diff --git a/tools/gator/daemon/k/perf_event.3.12.h b/tools/gator/daemon/k/perf_event.3.12.h
new file mode 100644
index 000000000000..e886c48cadf6
--- /dev/null
+++ b/tools/gator/daemon/k/perf_event.3.12.h
@@ -0,0 +1,792 @@
+/*
+ * Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_EVENT_H
+#define _LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
+	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+	PERF_COUNT_HW_CACHE_NODE		= 6,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
+	PERF_COUNT_SW_DUMMY			= 9,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
+	PERF_SAMPLE_REGS_USER			= 1U << 12,
+	PERF_SAMPLE_STACK_USER			= 1U << 13,
+	PERF_SAMPLE_WEIGHT			= 1U << 14,
+	PERF_SAMPLE_DATA_SRC			= 1U << 15,
+	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
+
+	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
+};
+
+/*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type {
+	PERF_SAMPLE_BRANCH_USER		= 1U << 0, /* user branches */
+	PERF_SAMPLE_BRANCH_KERNEL	= 1U << 1, /* kernel branches */
+	PERF_SAMPLE_BRANCH_HV		= 1U << 2, /* hypervisor branches */
+
+	PERF_SAMPLE_BRANCH_ANY		= 1U << 3, /* any branch types */
+	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
+	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
+	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
+	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+	(PERF_SAMPLE_BRANCH_USER|\
+	 PERF_SAMPLE_BRANCH_KERNEL|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+	PERF_SAMPLE_REGS_ABI_NONE	= 0,
+	PERF_SAMPLE_REGS_ABI_32		= 1,
+	PERF_SAMPLE_REGS_ABI_64		= 2,
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
+#define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
+					/* add: sample_stack_user */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+				/*
+				 * precise_ip:
+				 *
+				 *  0 - SAMPLE_IP can have arbitrary skid
+				 *  1 - SAMPLE_IP must have constant skid
+				 *  2 - SAMPLE_IP requested to have 0 skid
+				 *  3 - SAMPLE_IP must have 0 skid
+				 *
+				 *  See also PERF_RECORD_MISC_EXACT_IP
+				 */
+				precise_ip     :  2, /* skid constraint       */
+				mmap_data      :  1, /* non-exec mmap data    */
+				sample_id_all  :  1, /* sample_type all events */
+
+				exclude_host   :  1, /* don't count in host   */
+				exclude_guest  :  1, /* don't count in guest  */
+
+				exclude_callchain_kernel : 1, /* exclude kernel callchains */
+				exclude_callchain_user   : 1, /* exclude user callchains */
+				mmap2          :  1, /* include mmap with inode data     */
+
+				__reserved_1   : 40;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+
+	__u32			bp_type;
+	union {
+		__u64		bp_addr;
+		__u64		config1; /* extension of config */
+	};
+	union {
+		__u64		bp_len;
+		__u64		config2; /* extension of config1 */
+	};
+	__u64	branch_sample_type; /* enum perf_branch_sample_type */
+
+	/*
+	 * Defines set of user regs to dump on samples.
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_user;
+
+	/*
+	 * Defines size of the user stack to dump on samples.
+	 */
+	__u32	sample_stack_user;
+
+	/* Align to u64. */
+	__u32	__reserved_2;
+};
+
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq, time_mult, time_shift, idx, width;
+	 *   u64 count, enabled, running;
+	 *   u64 cyc, time_offset;
+	 *   s64 pmc = 0;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *     barrier()
+	 *
+	 *     enabled = pc->time_enabled;
+	 *     running = pc->time_running;
+	 *
+	 *     if (pc->cap_usr_time && enabled != running) {
+	 *       cyc = rdtsc();
+	 *       time_offset = pc->time_offset;
+	 *       time_mult   = pc->time_mult;
+	 *       time_shift  = pc->time_shift;
+	 *     }
+	 *
+	 *     idx = pc->index;
+	 *     count = pc->offset;
+	 *     if (pc->cap_usr_rdpmc && idx) {
+	 *       width = pc->pmc_width;
+	 *       pmc = rdpmc(idx - 1);
+	 *     }
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+	union {
+		__u64	capabilities;
+		struct {
+			__u64	cap_bit0		: 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+				cap_bit0_is_deprecated	: 1, /* Always 1, signals that bit 0 is zero */
+
+				cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
+				cap_user_time		: 1, /* The time_* fields are used */
+				cap_user_time_zero	: 1, /* The time_zero field is used */
+				cap_____res		: 59;
+		};
+	};
+
+	/*
+	 * If cap_usr_rdpmc this field provides the bit-width of the value
+	 * read using the rdpmc() or equivalent instruction. This can be used
+	 * to sign extend the result like:
+	 *
+	 *   pmc <<= 64 - width;
+	 *   pmc >>= 64 - width; // signed shift right
+	 *   count += pmc;
+	 */
+	__u16	pmc_width;
+
+	/*
+	 * If cap_usr_time the below fields can be used to compute the time
+	 * delta since time_enabled (in ns) using rdtsc or similar.
+	 *
+	 *   u64 quot, rem;
+	 *   u64 delta;
+	 *
+	 *   quot = (cyc >> time_shift);
+	 *   rem = cyc & ((1 << time_shift) - 1);
+	 *   delta = time_offset + quot * time_mult +
+	 *              ((rem * time_mult) >> time_shift);
+	 *
+	 * Where time_offset,time_mult,time_shift and cyc are read in the
+	 * seqcount loop described above. This delta can then be added to
+	 * enabled and possible running (if idx), improving the scaling:
+	 *
+	 *   enabled += delta;
+	 *   if (idx)
+	 *     running += delta;
+	 *
+	 *   quot = count / running;
+	 *   rem  = count % running;
+	 *   count = quot * enabled + (rem * enabled) / running;
+	 */
+	__u16	time_shift;
+	__u32	time_mult;
+	__u64	time_offset;
+	/*
+	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+	 * from sample timestamps.
+	 *
+	 *   time = timestamp - time_zero;
+	 *   quot = time / time_mult;
+	 *   rem  = time % time_mult;
+	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+	 *
+	 * And vice versa:
+	 *
+	 *   quot = cyc >> time_shift;
+	 *   rem  = cyc & ((1 << time_shift) - 1);
+	 *   timestamp = time_zero + quot * time_mult +
+	 *               ((rem * time_mult) >> time_shift);
+	 */
+	__u64	time_zero;
+	__u32	size;			/* Header size up to __reserved[] fields. */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u8	__reserved[118*8+4];	/* align to 1k. */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an smp_rmb(),
+	 * after reading this value.
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data, after issueing
+	 * an smp_mb() to separate the data read from the ->data_tail store.
+	 * In this case the kernel will not over-write unread data.
+	 *
+	 * See perf_output_put_handle() for the data ordering.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
+
+#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * If perf_event_attr.sample_id_all is set then all event types will
+	 * have the sample_type selected fields related to where/when
+	 * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+	 * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+	 * just after the perf_event_header and the fields already present for
+	 * the existing fields, i.e. at the end of the payload. That way a newer
+	 * perf.data file will be supported by older perf tools, with these new
+	 * optional fields being ignored.
+	 *
+	 * struct sample_id {
+	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 * 	{ u64			time;     } && PERF_SAMPLE_TIME
+	 * 	{ u64			id;       } && PERF_SAMPLE_ID
+	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 * } && perf_event_attr::sample_id_all
+	 *
+	 * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
+	 * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+	 * relative to header.size.
+	 */
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE			= 5,
+	PERF_RECORD_UNTHROTTLE			= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	#
+	 *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+	 *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+	 *	# is fixed relative to header.
+	 *	#
+	 *
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 *
+	 *	{ u64                   nr;
+	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 * 	{ u64			abi; # enum perf_sample_regs_abi
+	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+	 *
+	 * 	{ u64			size;
+	 * 	  char			data[size];
+	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+	 *
+	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
+	 * };
+	 */
+	PERF_RECORD_SAMPLE			= 9,
+
+	/*
+	 * The MMAP2 records are an augmented version of MMAP, they add
+	 * maj, min, ino numbers to be used to uniquely identify each mapping
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	u32				maj;
+	 *	u32				min;
+	 *	u64				ino;
+	 *	u64				ino_generation;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP2			= 10,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+#define PERF_MAX_STACK_DEPTH		127
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
+
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64   mem_op:5,	/* type of opcode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_snoop:5,	/* snoop mode */
+			mem_lock:2,	/* lock instr */
+			mem_dtlb:7,	/* tlb access */
+			mem_rsvd:31;
+	};
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA		0x01 /* not available */
+#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
+#define PERF_MEM_OP_STORE	0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT	0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA		0x01  /* not available */
+#define PERF_MEM_LVL_HIT	0x02  /* hit level */
+#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
+#define PERF_MEM_LVL_L1		0x08  /* L1 */
+#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2		0x20  /* L2 */
+#define PERF_MEM_LVL_L3		0x40  /* L3 */
+#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT	5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA	0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT	19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA	0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT	24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA		0x01 /* not available */
+#define PERF_MEM_TLB_HIT	0x02 /* hit level */
+#define PERF_MEM_TLB_MISS	0x04 /* miss level */
+#define PERF_MEM_TLB_L1		0x08 /* L1 */
+#define PERF_MEM_TLB_L2		0x10 /* L2 */
+#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT	26
+
+#define PERF_MEM_S(a, s) \
+	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
+/*
+ * single taken branch record layout:
+ *
+ *      from: source instruction (may not always be a branch insn)
+ *        to: branch target
+ *   mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ *     in_tx: running in a hardware transaction
+ *     abort: aborting a hardware transaction
+ */
+struct perf_branch_entry {
+	__u64	from;
+	__u64	to;
+	__u64	mispred:1,  /* target mispredicted */
+		predicted:1,/* target predicted */
+		in_tx:1,    /* in transaction */
+		abort:1,    /* transaction abort */
+		reserved:60;
+};
+
+#endif /* _LINUX_PERF_EVENT_H */
diff --git a/tools/gator/daemon/k/perf_event.h b/tools/gator/daemon/k/perf_event.h
new file mode 120000
index 000000000000..e5dff8c21ef4
--- /dev/null
+++ b/tools/gator/daemon/k/perf_event.h
@@ -0,0 +1 @@
+perf_event.3.12.h
+\ No newline at end of file
diff --git a/tools/gator/daemon/libsensors/COPYING.LGPL b/tools/gator/daemon/libsensors/COPYING.LGPL
new file mode 100644
index 000000000000..4362b49151d7
--- /dev/null
+++ b/tools/gator/daemon/libsensors/COPYING.LGPL
@@ -0,0 +1,502 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/tools/gator/daemon/libsensors/access.c b/tools/gator/daemon/libsensors/access.c
new file mode 100644
index 000000000000..8e227e2550db
--- /dev/null
+++ b/tools/gator/daemon/libsensors/access.c
@@ -0,0 +1,561 @@
+/*
+    access.c - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007-2009   Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "access.h"
+#include "sensors.h"
+#include "data.h"
+#include "error.h"
+#include "sysfs.h"
+
+/* We watch the recursion depth for variables only, as an easy way to
+   detect cycles. */
+#define DEPTH_MAX	8
+
+static int sensors_eval_expr(const sensors_chip_features *chip_features,
+			     const sensors_expr *expr,
+			     double val, int depth, double *result);
+
+/* Compare two chips name descriptions, to see whether they could match.
+   Return 0 if it does not match, return 1 if it does match. */
+static int sensors_match_chip(const sensors_chip_name *chip1,
+		       const sensors_chip_name *chip2)
+{
+	if ((chip1->prefix != SENSORS_CHIP_NAME_PREFIX_ANY) &&
+	    (chip2->prefix != SENSORS_CHIP_NAME_PREFIX_ANY) &&
+	    strcmp(chip1->prefix, chip2->prefix))
+		return 0;
+
+	if ((chip1->bus.type != SENSORS_BUS_TYPE_ANY) &&
+	    (chip2->bus.type != SENSORS_BUS_TYPE_ANY) &&
+	    (chip1->bus.type != chip2->bus.type))
+		return 0;
+
+	if ((chip1->bus.nr != SENSORS_BUS_NR_ANY) &&
+	    (chip2->bus.nr != SENSORS_BUS_NR_ANY) &&
+	    (chip1->bus.nr != chip2->bus.nr))
+		return 0;
+
+	if ((chip1->addr != chip2->addr) &&
+	    (chip1->addr != SENSORS_CHIP_NAME_ADDR_ANY) &&
+	    (chip2->addr != SENSORS_CHIP_NAME_ADDR_ANY))
+		return 0;
+
+	return 1;
+}
+
+/* Returns, one by one, a pointer to all sensor_chip structs of the
+   config file which match with the given chip name. Last should be
+   the value returned by the last call, or NULL if this is the first
+   call. Returns NULL if no more matches are found. Do not modify
+   the struct the return value points to! 
+   Note that this visits the list of chips from last to first. Usually,
+   you want the match that was latest in the config file. */
+static sensors_chip *
+sensors_for_all_config_chips(const sensors_chip_name *name,
+			     const sensors_chip *last)
+{
+	int nr, i;
+	sensors_chip_name_list chips;
+
+	for (nr = last ? last - sensors_config_chips - 1 :
+			 sensors_config_chips_count - 1; nr >= 0; nr--) {
+
+		chips = sensors_config_chips[nr].chips;
+		for (i = 0; i < chips.fits_count; i++) {
+			if (sensors_match_chip(&chips.fits[i], name))
+				return sensors_config_chips + nr;
+		}
+	}
+	return NULL;
+}
+
+/* Look up a chip in the intern chip list, and return a pointer to it.
+   Do not modify the struct the return value points to! Returns NULL if
+   not found.*/
+static const sensors_chip_features *
+sensors_lookup_chip(const sensors_chip_name *name)
+{
+	int i;
+
+	for (i = 0; i < sensors_proc_chips_count; i++)
+		if (sensors_match_chip(&sensors_proc_chips[i].chip, name))
+			return &sensors_proc_chips[i];
+
+	return NULL;
+}
+
+/* Look up a subfeature of the given chip, and return a pointer to it.
+   Do not modify the struct the return value points to! Returns NULL if
+   not found.*/
+static const sensors_subfeature *
+sensors_lookup_subfeature_nr(const sensors_chip_features *chip,
+			     int subfeat_nr)
+{
+	if (subfeat_nr < 0 ||
+	    subfeat_nr >= chip->subfeature_count)
+		return NULL;
+	return chip->subfeature + subfeat_nr;
+}
+
+/* Look up a feature of the given chip, and return a pointer to it.
+   Do not modify the struct the return value points to! Returns NULL if
+   not found.*/
+static const sensors_feature *
+sensors_lookup_feature_nr(const sensors_chip_features *chip, int feat_nr)
+{
+	if (feat_nr < 0 ||
+	    feat_nr >= chip->feature_count)
+		return NULL;
+	return chip->feature + feat_nr;
+}
+
+/* Look up a subfeature by name, and return a pointer to it.
+   Do not modify the struct the return value points to! Returns NULL if 
+   not found.*/
+static const sensors_subfeature *
+sensors_lookup_subfeature_name(const sensors_chip_features *chip,
+			       const char *name)
+{
+	int j;
+
+	for (j = 0; j < chip->subfeature_count; j++)
+		if (!strcmp(chip->subfeature[j].name, name))
+			return chip->subfeature + j;
+	return NULL;
+}
+
+/* Check whether the chip name is an 'absolute' name, which can only match
+   one chip, or whether it has wildcards. Returns 0 if it is absolute, 1
+   if there are wildcards. */
+int sensors_chip_name_has_wildcards(const sensors_chip_name *chip)
+{
+	if ((chip->prefix == SENSORS_CHIP_NAME_PREFIX_ANY) ||
+	    (chip->bus.type == SENSORS_BUS_TYPE_ANY) ||
+	    (chip->bus.nr == SENSORS_BUS_NR_ANY) ||
+	    (chip->addr == SENSORS_CHIP_NAME_ADDR_ANY))
+		return 1;
+	else
+		return 0;
+}
+
+/* Look up the label for a given feature. Note that chip should not
+   contain wildcard values! The returned string is newly allocated (free it
+   yourself). On failure, NULL is returned.
+   If no label exists for this feature, its name is returned itself. */
+char *sensors_get_label(const sensors_chip_name *name,
+			const sensors_feature *feature)
+{
+	char *label;
+	const sensors_chip *chip;
+	char buf[PATH_MAX];
+	FILE *f;
+	int i;
+
+	if (sensors_chip_name_has_wildcards(name))
+		return NULL;
+
+	for (chip = NULL; (chip = sensors_for_all_config_chips(name, chip));)
+		for (i = 0; i < chip->labels_count; i++)
+			if (!strcmp(feature->name, chip->labels[i].name)) {
+				label = chip->labels[i].value;
+				goto sensors_get_label_exit;
+			}
+
+	/* No user specified label, check for a _label sysfs file */
+	snprintf(buf, PATH_MAX, "%s/%s_label", name->path, feature->name);
+	
+	if ((f = fopen(buf, "r"))) {
+		i = fread(buf, 1, sizeof(buf), f);
+		fclose(f);
+		if (i > 0) {
+			/* i - 1 to strip the '\n' at the end */
+			buf[i - 1] = 0;
+			label = buf;
+			goto sensors_get_label_exit;
+		}
+	}
+
+	/* No label, return the feature name instead */
+	label = feature->name;
+	
+sensors_get_label_exit:
+	label = strdup(label);
+	if (!label)
+		sensors_fatal_error(__func__, "Allocating label text");
+	return label;
+}
+
+/* Looks up whether a feature should be ignored. Returns
+   1 if it should be ignored, 0 if not. */
+static int sensors_get_ignored(const sensors_chip_name *name,
+			       const sensors_feature *feature)
+{
+	const sensors_chip *chip;
+	int i;
+
+	for (chip = NULL; (chip = sensors_for_all_config_chips(name, chip));)
+		for (i = 0; i < chip->ignores_count; i++)
+			if (!strcmp(feature->name, chip->ignores[i].name))
+				return 1;
+	return 0;
+}
+
+/* Read the value of a subfeature of a certain chip. Note that chip should not
+   contain wildcard values! This function will return 0 on success, and <0
+   on failure. */
+static int __sensors_get_value(const sensors_chip_name *name, int subfeat_nr,
+			       int depth, double *result)
+{
+	const sensors_chip_features *chip_features;
+	const sensors_subfeature *subfeature;
+	const sensors_expr *expr = NULL;
+	double val;
+	int res, i;
+
+	if (depth >= DEPTH_MAX)
+		return -SENSORS_ERR_RECURSION;
+	if (sensors_chip_name_has_wildcards(name))
+		return -SENSORS_ERR_WILDCARDS;
+	if (!(chip_features = sensors_lookup_chip(name)))
+		return -SENSORS_ERR_NO_ENTRY;
+	if (!(subfeature = sensors_lookup_subfeature_nr(chip_features,
+							subfeat_nr)))
+		return -SENSORS_ERR_NO_ENTRY;
+	if (!(subfeature->flags & SENSORS_MODE_R))
+		return -SENSORS_ERR_ACCESS_R;
+
+	/* Apply compute statement if it exists */
+	if (subfeature->flags & SENSORS_COMPUTE_MAPPING) {
+		const sensors_feature *feature;
+		const sensors_chip *chip;
+
+		feature = sensors_lookup_feature_nr(chip_features,
+					subfeature->mapping);
+
+		chip = NULL;
+		while (!expr &&
+		       (chip = sensors_for_all_config_chips(name, chip)))
+			for (i = 0; i < chip->computes_count; i++) {
+				if (!strcmp(feature->name,
+					    chip->computes[i].name)) {
+					expr = chip->computes[i].from_proc;
+					break;
+				}
+			}
+	}
+
+	res = sensors_read_sysfs_attr(name, subfeature, &val);
+	if (res)
+		return res;
+	if (!expr)
+		*result = val;
+	else if ((res = sensors_eval_expr(chip_features, expr, val, depth,
+					  result)))
+		return res;
+	return 0;
+}
+
+int sensors_get_value(const sensors_chip_name *name, int subfeat_nr,
+		      double *result)
+{
+	return __sensors_get_value(name, subfeat_nr, 0, result);
+}
+
+/* Set the value of a subfeature of a certain chip. Note that chip should not
+   contain wildcard values! This function will return 0 on success, and <0
+   on failure. */
+int sensors_set_value(const sensors_chip_name *name, int subfeat_nr,
+		      double value)
+{
+	const sensors_chip_features *chip_features;
+	const sensors_subfeature *subfeature;
+	const sensors_expr *expr = NULL;
+	int i, res;
+	double to_write;
+
+	if (sensors_chip_name_has_wildcards(name))
+		return -SENSORS_ERR_WILDCARDS;
+	if (!(chip_features = sensors_lookup_chip(name)))
+		return -SENSORS_ERR_NO_ENTRY;
+	if (!(subfeature = sensors_lookup_subfeature_nr(chip_features,
+							subfeat_nr)))
+		return -SENSORS_ERR_NO_ENTRY;
+	if (!(subfeature->flags & SENSORS_MODE_W))
+		return -SENSORS_ERR_ACCESS_W;
+
+	/* Apply compute statement if it exists */
+	if (subfeature->flags & SENSORS_COMPUTE_MAPPING) {
+		const sensors_feature *feature;
+		const sensors_chip *chip;
+
+		feature = sensors_lookup_feature_nr(chip_features,
+					subfeature->mapping);
+
+		chip = NULL;
+		while (!expr &&
+		       (chip = sensors_for_all_config_chips(name, chip)))
+			for (i = 0; i < chip->computes_count; i++) {
+				if (!strcmp(feature->name,
+					    chip->computes[i].name)) {
+					expr = chip->computes[i].to_proc;
+					break;
+				}
+			}
+	}
+
+	to_write = value;
+	if (expr)
+		if ((res = sensors_eval_expr(chip_features, expr,
+					     value, 0, &to_write)))
+			return res;
+	return sensors_write_sysfs_attr(name, subfeature, to_write);
+}
+
+const sensors_chip_name *sensors_get_detected_chips(const sensors_chip_name
+						    *match, int *nr)
+{
+	const sensors_chip_name *res;
+
+	while (*nr < sensors_proc_chips_count) {
+		res = &sensors_proc_chips[(*nr)++].chip;
+		if (!match || sensors_match_chip(res, match))
+			return res;
+	}
+	return NULL;
+}
+
+const char *sensors_get_adapter_name(const sensors_bus_id *bus)
+{
+	int i;
+
+	/* bus types with a single instance */
+	switch (bus->type) {
+	case SENSORS_BUS_TYPE_ISA:
+		return "ISA adapter";
+	case SENSORS_BUS_TYPE_PCI:
+		return "PCI adapter";
+	/* SPI should not be here, but for now SPI adapters have no name
+	   so we don't have any custom string to return. */
+	case SENSORS_BUS_TYPE_SPI:
+		return "SPI adapter";
+	case SENSORS_BUS_TYPE_VIRTUAL:
+		return "Virtual device";
+	case SENSORS_BUS_TYPE_ACPI:
+		return "ACPI interface";
+	/* HID should probably not be there either, but I don't know if
+	   HID buses have a name nor where to find it. */
+	case SENSORS_BUS_TYPE_HID:
+		return "HID adapter";
+	}
+
+	/* bus types with several instances */
+	for (i = 0; i < sensors_proc_bus_count; i++)
+		if (sensors_proc_bus[i].bus.type == bus->type &&
+		    sensors_proc_bus[i].bus.nr == bus->nr)
+			return sensors_proc_bus[i].adapter;
+	return NULL;
+}
+
+const sensors_feature *
+sensors_get_features(const sensors_chip_name *name, int *nr)
+{
+	const sensors_chip_features *chip;
+
+	if (!(chip = sensors_lookup_chip(name)))
+		return NULL;	/* No such chip */
+
+	while (*nr < chip->feature_count
+	    && sensors_get_ignored(name, &chip->feature[*nr]))
+		(*nr)++;
+	if (*nr >= chip->feature_count)
+		return NULL;
+	return &chip->feature[(*nr)++];
+}
+
+const sensors_subfeature *
+sensors_get_all_subfeatures(const sensors_chip_name *name,
+			const sensors_feature *feature, int *nr)
+{
+	const sensors_chip_features *chip;
+	const sensors_subfeature *subfeature;
+
+	if (!(chip = sensors_lookup_chip(name)))
+		return NULL;	/* No such chip */
+
+	/* Seek directly to the first subfeature */
+	if (*nr < feature->first_subfeature)
+		*nr = feature->first_subfeature;
+
+	if (*nr >= chip->subfeature_count)
+		return NULL;	/* end of list */
+	subfeature = &chip->subfeature[(*nr)++];
+	if (subfeature->mapping == feature->number)
+		return subfeature;
+	return NULL;	/* end of subfeature list */
+}
+
+const sensors_subfeature *
+sensors_get_subfeature(const sensors_chip_name *name,
+		       const sensors_feature *feature,
+		       sensors_subfeature_type type)
+{
+	const sensors_chip_features *chip;
+	int i;
+
+	if (!(chip = sensors_lookup_chip(name)))
+		return NULL;	/* No such chip */
+
+	for (i = feature->first_subfeature; i < chip->subfeature_count &&
+	     chip->subfeature[i].mapping == feature->number; i++) {
+		if (chip->subfeature[i].type == type)
+			return &chip->subfeature[i];
+	}
+	return NULL;	/* No such subfeature */
+}
+
+/* Evaluate an expression */
+int sensors_eval_expr(const sensors_chip_features *chip_features,
+		      const sensors_expr *expr,
+		      double val, int depth, double *result)
+{
+	double res1, res2;
+	int res;
+	const sensors_subfeature *subfeature;
+
+	if (expr->kind == sensors_kind_val) {
+		*result = expr->data.val;
+		return 0;
+	}
+	if (expr->kind == sensors_kind_source) {
+		*result = val;
+		return 0;
+	}
+	if (expr->kind == sensors_kind_var) {
+		if (!(subfeature = sensors_lookup_subfeature_name(chip_features,
+							    expr->data.var)))
+			return -SENSORS_ERR_NO_ENTRY;
+		return __sensors_get_value(&chip_features->chip,
+					   subfeature->number, depth + 1,
+					   result);
+	}
+	if ((res = sensors_eval_expr(chip_features, expr->data.subexpr.sub1,
+				     val, depth, &res1)))
+		return res;
+	if (expr->data.subexpr.sub2 &&
+	    (res = sensors_eval_expr(chip_features, expr->data.subexpr.sub2,
+				     val, depth, &res2)))
+		return res;
+	switch (expr->data.subexpr.op) {
+	case sensors_add:
+		*result = res1 + res2;
+		return 0;
+	case sensors_sub:
+		*result = res1 - res2;
+		return 0;
+	case sensors_multiply:
+		*result = res1 * res2;
+		return 0;
+	case sensors_divide:
+		if (res2 == 0.0)
+			return -SENSORS_ERR_DIV_ZERO;
+		*result = res1 / res2;
+		return 0;
+	case sensors_negate:
+		*result = -res1;
+		return 0;
+	case sensors_exp:
+		*result = exp(res1);
+		return 0;
+	case sensors_log:
+		if (res1 < 0.0)
+			return -SENSORS_ERR_DIV_ZERO;
+		*result = log(res1);
+		return 0;
+	}
+	return 0;
+}
+
+/* Execute all set statements for this particular chip. The chip may not 
+   contain wildcards!  This function will return 0 on success, and <0 on 
+   failure. */
+static int sensors_do_this_chip_sets(const sensors_chip_name *name)
+{
+	const sensors_chip_features *chip_features;
+	sensors_chip *chip;
+	double value;
+	int i;
+	int err = 0, res;
+	const sensors_subfeature *subfeature;
+
+	chip_features = sensors_lookup_chip(name);	/* Can't fail */
+
+	for (chip = NULL; (chip = sensors_for_all_config_chips(name, chip));)
+		for (i = 0; i < chip->sets_count; i++) {
+			subfeature = sensors_lookup_subfeature_name(chip_features,
+							chip->sets[i].name);
+			if (!subfeature) {
+				sensors_parse_error_wfn("Unknown feature name",
+						    chip->sets[i].line.filename,
+						    chip->sets[i].line.lineno);
+				err = -SENSORS_ERR_NO_ENTRY;
+				continue;
+			}
+
+			res = sensors_eval_expr(chip_features,
+						chip->sets[i].value, 0,
+						0, &value);
+			if (res) {
+				sensors_parse_error_wfn("Error parsing expression",
+						    chip->sets[i].line.filename,
+						    chip->sets[i].line.lineno);
+				err = res;
+				continue;
+			}
+			if ((res = sensors_set_value(name, subfeature->number,
+						     value))) {
+				sensors_parse_error_wfn("Failed to set value",
+						chip->sets[i].line.filename,
+						chip->sets[i].line.lineno);
+				err = res;
+				continue;
+			}
+		}
+	return err;
+}
+
+/* Execute all set statements for this particular chip. The chip may contain
+   wildcards!  This function will return 0 on success, and <0 on failure. */
+int sensors_do_chip_sets(const sensors_chip_name *name)
+{
+	int nr, this_res;
+	const sensors_chip_name *found_name;
+	int res = 0;
+
+	for (nr = 0; (found_name = sensors_get_detected_chips(name, &nr));) {
+		this_res = sensors_do_this_chip_sets(found_name);
+		if (this_res)
+			res = this_res;
+	}
+	return res;
+}
diff --git a/tools/gator/daemon/libsensors/access.h b/tools/gator/daemon/libsensors/access.h
new file mode 100644
index 000000000000..1d3784340757
--- /dev/null
+++ b/tools/gator/daemon/libsensors/access.h
@@ -0,0 +1,33 @@
+/*
+    access.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007        Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_ACCESS_H
+#define LIB_SENSORS_ACCESS_H
+
+#include "sensors.h"
+#include "data.h"
+
+/* Check whether the chip name is an 'absolute' name, which can only match
+   one chip, or whether it has wildcards. Returns 0 if it is absolute, 1
+   if there are wildcards. */
+int sensors_chip_name_has_wildcards(const sensors_chip_name *chip);
+
+#endif /* def LIB_SENSORS_ACCESS_H */
diff --git a/tools/gator/daemon/libsensors/conf-lex.c b/tools/gator/daemon/libsensors/conf-lex.c
new file mode 100644
index 000000000000..a54664b3d77b
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf-lex.c
@@ -0,0 +1,2881 @@
+
+#line 3 "<stdout>"
+
+#define  YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define yy_create_buffer sensors_yy_create_buffer
+#define yy_delete_buffer sensors_yy_delete_buffer
+#define yy_flex_debug sensors_yy_flex_debug
+#define yy_init_buffer sensors_yy_init_buffer
+#define yy_flush_buffer sensors_yy_flush_buffer
+#define yy_load_buffer_state sensors_yy_load_buffer_state
+#define yy_switch_to_buffer sensors_yy_switch_to_buffer
+#define yyin sensors_yyin
+#define yyleng sensors_yyleng
+#define yylex sensors_yylex
+#define yylineno sensors_yylineno
+#define yyout sensors_yyout
+#define yyrestart sensors_yyrestart
+#define yytext sensors_yytext
+#define yywrap sensors_yywrap
+#define yyalloc sensors_yyalloc
+#define yyrealloc sensors_yyrealloc
+#define yyfree sensors_yyfree
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types. 
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t; 
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif	/* defined (__STDC__) */
+#endif	/* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index.  If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start) - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE sensors_yyrestart(sensors_yyin  )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+extern int sensors_yyleng;
+
+extern FILE *sensors_yyin, *sensors_yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+    #define YY_LESS_LINENO(n)
+    
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up sensors_yytext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+		*yy_cp = (yy_hold_char); \
+		YY_RESTORE_YY_MORE_OFFSET \
+		(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+		YY_DO_BEFORE_ACTION; /* set up sensors_yytext again */ \
+		} \
+	while ( 0 )
+
+#define unput(c) yyunput( c, (yytext_ptr)  )
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+    
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+	/* When an EOF's been seen but there's still some text to process
+	 * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+	 * shouldn't try reading from the input source any more.  We might
+	 * still have a bunch of tokens to match, though, because of
+	 * possible backing-up.
+	 *
+	 * When we actually see the EOF, we change the status to "new"
+	 * (via sensors_yyrestart()), so that the user can continue scanning by
+	 * just pointing sensors_yyin at a new input file.
+	 */
+#define YY_BUFFER_EOF_PENDING 2
+
+	};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
+static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
+                          ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
+                          : NULL)
+
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when sensors_yytext is formed. */
+static char yy_hold_char;
+static int yy_n_chars;		/* number of characters read into yy_ch_buf */
+int sensors_yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 0;		/* whether we need to initialize */
+static int yy_start = 0;	/* start state number */
+
+/* Flag which is used to allow sensors_yywrap()'s to do buffer switches
+ * instead of setting up a fresh sensors_yyin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void sensors_yyrestart (FILE *input_file  );
+void sensors_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
+YY_BUFFER_STATE sensors_yy_create_buffer (FILE *file,int size  );
+void sensors_yy_delete_buffer (YY_BUFFER_STATE b  );
+void sensors_yy_flush_buffer (YY_BUFFER_STATE b  );
+void sensors_yypush_buffer_state (YY_BUFFER_STATE new_buffer  );
+void sensors_yypop_buffer_state (void );
+
+static void sensors_yyensure_buffer_stack (void );
+static void sensors_yy_load_buffer_state (void );
+static void sensors_yy_init_buffer (YY_BUFFER_STATE b,FILE *file  );
+
+#define YY_FLUSH_BUFFER sensors_yy_flush_buffer(YY_CURRENT_BUFFER )
+
+YY_BUFFER_STATE sensors_yy_scan_buffer (char *base,yy_size_t size  );
+YY_BUFFER_STATE sensors_yy_scan_string (yyconst char *yy_str  );
+YY_BUFFER_STATE sensors_yy_scan_bytes (yyconst char *bytes,int len  );
+
+void *sensors_yyalloc (yy_size_t  );
+void *sensors_yyrealloc (void *,yy_size_t  );
+void sensors_yyfree (void *  );
+
+#define yy_new_buffer sensors_yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+	{ \
+	if ( ! YY_CURRENT_BUFFER ){ \
+        sensors_yyensure_buffer_stack (); \
+		YY_CURRENT_BUFFER_LVALUE =    \
+            sensors_yy_create_buffer(sensors_yyin,YY_BUF_SIZE ); \
+	} \
+	YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+	}
+
+#define yy_set_bol(at_bol) \
+	{ \
+	if ( ! YY_CURRENT_BUFFER ){\
+        sensors_yyensure_buffer_stack (); \
+		YY_CURRENT_BUFFER_LVALUE =    \
+            sensors_yy_create_buffer(sensors_yyin,YY_BUF_SIZE ); \
+	} \
+	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+	}
+
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define sensors_yywrap(n) 1
+#define YY_SKIP_YYWRAP
+
+typedef unsigned char YY_CHAR;
+
+FILE *sensors_yyin = (FILE *) 0, *sensors_yyout = (FILE *) 0;
+
+typedef int yy_state_type;
+
+extern int sensors_yylineno;
+
+int sensors_yylineno = 1;
+
+extern char *sensors_yytext;
+#define yytext_ptr sensors_yytext
+static yyconst flex_int16_t yy_nxt[][39] =
+    {
+    {
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0
+    },
+
+    {
+        9,   10,   11,   12,   10,   13,   10,   10,   10,   10,
+       10,   10,   10,   10,   10,   10,   10,   10,   10,   10,
+       14,   15,   16,   14,   14,   14,   14,   14,   17,   18,
+       14,   14,   14,   14,   14,   19,   14,   14,   14
+    },
+
+    {
+        9,   10,   11,   12,   10,   13,   10,   10,   10,   10,
+       10,   10,   10,   10,   10,   10,   10,   10,   10,   10,
+
+       14,   15,   16,   14,   14,   14,   14,   14,   17,   18,
+       14,   14,   14,   14,   14,   19,   14,   14,   14
+    },
+
+    {
+        9,   20,   21,   22,   23,   24,   25,   26,   27,   28,
+       29,   30,   31,   32,   33,   34,   35,   36,   37,   38,
+       35,   35,   35,   35,   35,   35,   35,   35,   35,   35,
+       35,   35,   35,   35,   35,   35,   35,   35,   35
+    },
+
+    {
+        9,   20,   21,   22,   23,   24,   25,   26,   27,   28,
+       29,   30,   31,   32,   33,   34,   35,   36,   37,   38,
+       35,   35,   35,   35,   35,   35,   35,   35,   35,   35,
+       35,   35,   35,   35,   35,   35,   35,   35,   35
+
+    },
+
+    {
+        9,   39,   39,   40,   41,   39,   39,   39,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   42,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   39,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   39,   39
+    },
+
+    {
+        9,   39,   39,   40,   41,   39,   39,   39,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   42,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   39,   39,   39,
+       39,   39,   39,   39,   39,   39,   39,   39,   39
+    },
+
+    {
+        9,   43,   43,   44,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   43,   43,
+
+       43,   43,   43,   43,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   43
+    },
+
+    {
+        9,   43,   43,   44,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   43
+    },
+
+    {
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9
+
+    },
+
+    {
+        9,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,
+      -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,
+      -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,
+      -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10,  -10
+    },
+
+    {
+        9,  -11,   45,   46,  -11,  -11,  -11,  -11,  -11,  -11,
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
+    },
+
+    {
+        9,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
+
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
+    },
+
+    {
+        9,   47,   47,   48,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47,   47
+    },
+
+    {
+        9,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
+      -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,
+      -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   50,   49
+    },
+
+    {
+        9,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
+      -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
+       49,   49,   49,   49,   49,   49,   49,   51,   49,   49,
+       49,   49,   52,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
+      -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
+
+       49,   49,   49,   49,   49,   49,   53,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,
+      -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,
+       54,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,
+      -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,
+       49,   49,   49,   49,   55,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
+    },
+
+    {
+        9,  -21,   56,  -21,  -21,  -21,  -21,  -21,  -21,  -21,
+      -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,
+      -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,
+      -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21
+    },
+
+    {
+        9,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,
+      -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,
+
+      -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,
+      -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22,  -22
+    },
+
+    {
+        9,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
+    },
+
+    {
+        9,   57,   57,   58,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57,   57
+
+    },
+
+    {
+        9,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,
+      -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,
+      -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,
+      -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25
+    },
+
+    {
+        9,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
+    },
+
+    {
+        9,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27
+    },
+
+    {
+        9,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
+    },
+
+    {
+        9,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
+      -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
+      -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
+      -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29
+
+    },
+
+    {
+        9,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
+    },
+
+    {
+        9,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,
+      -31,  -31,  -31,  -31,   59,  -31,  -31,  -31,  -31,  -31,
+      -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,
+      -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31
+    },
+
+    {
+        9,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,
+
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32
+    },
+
+    {
+        9,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,
+      -33,  -33,   60,  -33,   61,  -33,   62,  -33,  -33,  -33,
+       62,   62,   62,   62,   62,   62,   62,   62,   62,   62,
+       62,   62,   62,   62,   62,   62,   62,   62,   62
+    },
+
+    {
+        9,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
+
+    },
+
+    {
+        9,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,
+      -35,  -35,  -35,  -35,   62,  -35,   62,  -35,  -35,  -35,
+       62,   62,   62,   62,   62,   62,   62,   62,   62,   62,
+       62,   62,   62,   62,   62,   62,   62,   62,   62
+    },
+
+    {
+        9,  -36,   63,   64,  -36,  -36,  -36,  -36,  -36,  -36,
+      -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
+      -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
+      -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36
+    },
+
+    {
+        9,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+      -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+
+      -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+      -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37
+    },
+
+    {
+        9,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
+      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
+      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
+      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38
+    },
+
+    {
+        9,   65,   65,  -39,  -39,   65,   65,   65,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,  -39,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,   65,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,   65,   65
+
+    },
+
+    {
+        9,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40
+    },
+
+    {
+        9,  -41,  -41,  -41,   66,  -41,  -41,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41
+    },
+
+    {
+        9,   67,   67,   68,   67,   67,   67,   67,   67,   67,
+       67,   67,   67,   67,   67,   67,   67,   67,   67,   67,
+
+       69,   70,   67,   67,   67,   71,   67,   67,   67,   67,
+       67,   72,   67,   67,   73,   67,   74,   67,   75
+    },
+
+    {
+        9,   76,   76,  -43,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76
+    },
+
+    {
+        9,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,
+      -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,
+      -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,
+      -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44
+
+    },
+
+    {
+        9,  -45,   45,   46,  -45,  -45,  -45,  -45,  -45,  -45,
+      -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,
+      -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,
+      -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45,  -45
+    },
+
+    {
+        9,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
+      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
+      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
+      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46
+    },
+
+    {
+        9,   47,   47,   48,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47,   47,   47,
+
+       47,   47,   47,   47,   47,   47,   47,   47,   47,   47,
+       47,   47,   47,   47,   47,   47,   47,   47,   47
+    },
+
+    {
+        9,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,
+      -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,
+      -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,
+      -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48
+    },
+
+    {
+        9,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,
+      -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,
+      -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   77,   49,   49,   49
+    },
+
+    {
+        9,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,
+      -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,
+       49,   49,   49,   49,   49,   49,   49,   49,   78,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,
+      -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,
+
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       79,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,
+      -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   80,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,
+      -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,
+       49,   81,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
+      -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   82,   49,   49
+    },
+
+    {
+        9,  -56,   56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56
+    },
+
+    {
+        9,   57,   57,   58,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57,   57,   57,
+
+       57,   57,   57,   57,   57,   57,   57,   57,   57,   57,
+       57,   57,   57,   57,   57,   57,   57,   57,   57
+    },
+
+    {
+        9,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,
+      -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,
+      -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,
+      -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58
+    },
+
+    {
+        9,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,
+      -59,  -59,  -59,  -59,   59,  -59,  -59,  -59,  -59,  -59,
+      -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,
+      -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59
+
+    },
+
+    {
+        9,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,
+      -60,  -60,  -60,  -60,   59,  -60,  -60,  -60,  -60,  -60,
+      -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,
+      -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60
+    },
+
+    {
+        9,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,
+      -61,  -61,   60,  -61,   61,  -61,   62,  -61,  -61,  -61,
+       62,   62,   62,   62,   62,   62,   62,   62,   62,   62,
+       62,   62,   62,   62,   62,   62,   62,   62,   62
+    },
+
+    {
+        9,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,
+      -62,  -62,  -62,  -62,   62,  -62,   62,  -62,  -62,  -62,
+
+       62,   62,   62,   62,   62,   62,   62,   62,   62,   62,
+       62,   62,   62,   62,   62,   62,   62,   62,   62
+    },
+
+    {
+        9,  -63,   63,   64,  -63,  -63,  -63,  -63,  -63,  -63,
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63
+    },
+
+    {
+        9,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
+      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
+      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
+      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64
+
+    },
+
+    {
+        9,   65,   65,  -65,  -65,   65,   65,   65,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,  -65,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,   65,   65,   65,
+       65,   65,   65,   65,   65,   65,   65,   65,   65
+    },
+
+    {
+        9,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
+      -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
+      -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
+      -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66
+    },
+
+    {
+        9,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,
+      -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,
+
+      -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,
+      -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67,  -67
+    },
+
+    {
+        9,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,
+      -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,
+      -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,
+      -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68,  -68
+    },
+
+    {
+        9,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,
+      -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,
+      -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,
+      -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69,  -69
+
+    },
+
+    {
+        9,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,
+      -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,
+      -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,
+      -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70,  -70
+    },
+
+    {
+        9,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,
+      -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,
+      -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,
+      -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71,  -71
+    },
+
+    {
+        9,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,
+      -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,
+
+      -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,
+      -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72,  -72
+    },
+
+    {
+        9,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,
+      -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,
+      -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,
+      -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73,  -73
+    },
+
+    {
+        9,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,
+      -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,
+      -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,
+      -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74,  -74
+
+    },
+
+    {
+        9,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,
+      -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,
+      -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,
+      -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75,  -75
+    },
+
+    {
+        9,   76,   76,  -76,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76,   76,
+       76,   76,   76,   76,   76,   76,   76,   76,   76
+    },
+
+    {
+        9,  -77,   83,  -77,  -77,  -77,  -77,  -77,  -77,  -77,
+      -77,  -77,  -77,  -77,  -77,  -77,  -77,  -77,  -77,  -77,
+
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -78,  -78,  -78,  -78,  -78,  -78,  -78,  -78,  -78,
+      -78,  -78,  -78,  -78,  -78,  -78,  -78,  -78,  -78,  -78,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   84,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -79,  -79,  -79,  -79,  -79,  -79,  -79,  -79,  -79,
+      -79,  -79,  -79,  -79,  -79,  -79,  -79,  -79,  -79,  -79,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   85,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -80,  -80,  -80,  -80,  -80,  -80,  -80,  -80,  -80,
+      -80,  -80,  -80,  -80,  -80,  -80,  -80,  -80,  -80,  -80,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   86,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -81,  -81,  -81,  -81,  -81,  -81,  -81,  -81,  -81,
+      -81,  -81,  -81,  -81,  -81,  -81,  -81,  -81,  -81,  -81,
+       49,   49,   49,   49,   87,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -82,   88,  -82,  -82,  -82,  -82,  -82,  -82,  -82,
+      -82,  -82,  -82,  -82,  -82,  -82,  -82,  -82,  -82,  -82,
+
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -83,   83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,
+      -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,
+      -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,
+      -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83,  -83
+    },
+
+    {
+        9,  -84,   89,  -84,  -84,  -84,  -84,  -84,  -84,  -84,
+      -84,  -84,  -84,  -84,  -84,  -84,  -84,  -84,  -84,  -84,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -85,  -85,  -85,  -85,  -85,  -85,  -85,  -85,  -85,
+      -85,  -85,  -85,  -85,  -85,  -85,  -85,  -85,  -85,  -85,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   90,   49
+    },
+
+    {
+        9,  -86,  -86,  -86,  -86,  -86,  -86,  -86,  -86,  -86,
+      -86,  -86,  -86,  -86,  -86,  -86,  -86,  -86,  -86,  -86,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   91,   49,   49,   49,   49
+    },
+
+    {
+        9,  -87,  -87,  -87,  -87,  -87,  -87,  -87,  -87,  -87,
+      -87,  -87,  -87,  -87,  -87,  -87,  -87,  -87,  -87,  -87,
+
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   92,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -88,   88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,
+      -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,
+      -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,
+      -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88,  -88
+    },
+
+    {
+        9,  -89,   89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,
+      -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,
+      -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,
+      -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89,  -89
+
+    },
+
+    {
+        9,  -90,  -90,  -90,  -90,  -90,  -90,  -90,  -90,  -90,
+      -90,  -90,  -90,  -90,  -90,  -90,  -90,  -90,  -90,  -90,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   93,   49,   49
+    },
+
+    {
+        9,  -91,  -91,  -91,  -91,  -91,  -91,  -91,  -91,  -91,
+      -91,  -91,  -91,  -91,  -91,  -91,  -91,  -91,  -91,  -91,
+       49,   49,   49,   49,   94,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -92,   95,  -92,  -92,  -92,  -92,  -92,  -92,  -92,
+      -92,  -92,  -92,  -92,  -92,  -92,  -92,  -92,  -92,  -92,
+
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -93,  -93,  -93,  -93,  -93,  -93,  -93,  -93,  -93,
+      -93,  -93,  -93,  -93,  -93,  -93,  -93,  -93,  -93,  -93,
+       49,   49,   49,   49,   96,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -94,   97,  -94,  -94,  -94,  -94,  -94,  -94,  -94,
+      -94,  -94,  -94,  -94,  -94,  -94,  -94,  -94,  -94,  -94,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+
+    },
+
+    {
+        9,  -95,   95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,
+      -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,
+      -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,
+      -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95,  -95
+    },
+
+    {
+        9,  -96,   98,  -96,  -96,  -96,  -96,  -96,  -96,  -96,
+      -96,  -96,  -96,  -96,  -96,  -96,  -96,  -96,  -96,  -96,
+       49,   49,   49,   49,   49,   49,   49,   49,   49,   49,
+       49,   49,   49,   49,   49,   49,   49,   49,   49
+    },
+
+    {
+        9,  -97,   97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,
+      -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,
+
+      -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,
+      -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97,  -97
+    },
+
+    {
+        9,  -98,   98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,
+      -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,
+      -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,
+      -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98,  -98
+    },
+
+    } ;
+
+static yy_state_type yy_get_previous_state (void );
+static yy_state_type yy_try_NUL_trans (yy_state_type current_state  );
+static int yy_get_next_buffer (void );
+static void yy_fatal_error (yyconst char msg[]  );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up sensors_yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+	(yytext_ptr) = yy_bp; \
+	sensors_yyleng = (size_t) (yy_cp - yy_bp); \
+	(yy_hold_char) = *yy_cp; \
+	*yy_cp = '\0'; \
+	(yy_c_buf_p) = yy_cp;
+
+#define YY_NUM_RULES 50
+#define YY_END_OF_BUFFER 51
+/* This struct is not used in this scanner,
+   but its presence is necessary. */
+struct yy_trans_info
+	{
+	flex_int32_t yy_verify;
+	flex_int32_t yy_nxt;
+	};
+static yyconst flex_int16_t yy_accept[99] =
+    {   0,
+        0,    0,    0,    0,    0,    0,   13,   13,   51,   12,
+        1,    2,    3,   11,   11,   11,   11,   11,   11,   33,
+       15,   16,   31,   18,   25,   26,   23,   21,   27,   22,
+       33,   24,   20,   28,   32,   33,   29,   30,   49,   36,
+       39,   48,   13,   14,    1,    2,    3,    4,   11,   11,
+       11,   11,   11,   11,   11,   15,   18,   19,   20,   34,
+       20,   32,   35,   17,   49,   38,   47,   37,   40,   41,
+       42,   43,   44,   45,   46,   13,    8,   11,   11,   11,
+       11,    6,    8,    9,   11,   11,   11,    6,    9,   11,
+       11,    5,   11,   10,    5,    7,   10,    7
+
+    } ;
+
+static yyconst flex_int32_t yy_ec[256] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
+        2,    2,    2,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    2,    1,    4,    5,    1,    1,    1,    1,    6,
+        7,    8,    9,   10,   11,   12,   13,   14,   14,   14,
+       14,   14,   14,   14,   14,   14,   14,    1,    1,    1,
+        1,    1,    1,   15,   16,   16,   16,   16,   16,   16,
+       16,   16,   16,   16,   16,   16,   16,   16,   16,   16,
+       16,   16,   16,   16,   16,   16,   16,   16,   16,   16,
+        1,   17,    1,   18,   16,   19,   20,   21,   22,   23,
+
+       24,   25,   26,   27,   28,   23,   23,   29,   30,   31,
+       32,   33,   23,   34,   35,   36,   37,   38,   23,   23,
+       23,   23,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1
+    } ;
+
+extern int sensors_yy_flex_debug;
+int sensors_yy_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *sensors_yytext;
+#line 1 "lib/conf-lex.l"
+#line 2 "lib/conf-lex.l"
+/*
+    conf-lex.l - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl> 
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "general.h"
+#include "data.h"
+#include "conf-parse.h"
+#include "error.h"
+#include "scanner.h"
+
+static int buffer_count;
+static int buffer_max;
+static char *buffer;
+
+char sensors_lex_error[100];
+
+const char *sensors_yyfilename;
+int sensors_yylineno;
+
+#define buffer_malloc() sensors_malloc_array(&buffer,&buffer_count,\
+                                             &buffer_max,1)
+#define buffer_free() sensors_free_array(&buffer,&buffer_count,\
+                                         &buffer_max)
+#define buffer_add_char(c) sensors_add_array_el(c,&buffer,\
+                                                &buffer_count,\
+                                                &buffer_max,1)
+#define buffer_add_string(s) sensors_add_array_els(s,strlen(s),\
+                                                   &buffer, \
+                                                   &buffer_count,&buffer_max,1)
+
+/* Scanner for configuration files */
+/* All states are exclusive */
+
+
+
+/* Any whitespace-like character */
+/* Note: `10', `10.4' and `.4' are valid, `10.' is not */
+/* Only positive whole numbers are recognized here */
+#line 1255 "<stdout>"
+
+#define INITIAL 0
+#define MIDDLE 1
+#define STRING 2
+#define ERR 3
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals (void );
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int sensors_yylex_destroy (void );
+
+int sensors_yyget_debug (void );
+
+void sensors_yyset_debug (int debug_flag  );
+
+YY_EXTRA_TYPE sensors_yyget_extra (void );
+
+void sensors_yyset_extra (YY_EXTRA_TYPE user_defined  );
+
+FILE *sensors_yyget_in (void );
+
+void sensors_yyset_in  (FILE * in_str  );
+
+FILE *sensors_yyget_out (void );
+
+void sensors_yyset_out  (FILE * out_str  );
+
+int sensors_yyget_leng (void );
+
+char *sensors_yyget_text (void );
+
+int sensors_yyget_lineno (void );
+
+void sensors_yyset_lineno (int line_number  );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int sensors_yywrap (void );
+#else
+extern int sensors_yywrap (void );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int );
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * );
+#endif
+
+#ifndef YY_NO_INPUT
+
+#ifdef __cplusplus
+static int yyinput (void );
+#else
+static int input (void );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO do { if (fwrite( sensors_yytext, sensors_yyleng, 1, sensors_yyout )) {} } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+		{ \
+		int c = '*'; \
+		size_t n; \
+		for ( n = 0; n < max_size && \
+			     (c = getc( sensors_yyin )) != EOF && c != '\n'; ++n ) \
+			buf[n] = (char) c; \
+		if ( c == '\n' ) \
+			buf[n++] = (char) c; \
+		if ( c == EOF && ferror( sensors_yyin ) ) \
+			YY_FATAL_ERROR( "input in flex scanner failed" ); \
+		result = n; \
+		} \
+	else \
+		{ \
+		errno=0; \
+		while ( (result = fread(buf, 1, max_size, sensors_yyin))==0 && ferror(sensors_yyin)) \
+			{ \
+			if( errno != EINTR) \
+				{ \
+				YY_FATAL_ERROR( "input in flex scanner failed" ); \
+				break; \
+				} \
+			errno=0; \
+			clearerr(sensors_yyin); \
+			} \
+		}\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int sensors_yylex (void);
+
+#define YY_DECL int sensors_yylex (void)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after sensors_yytext and sensors_yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+	YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+	register yy_state_type yy_current_state;
+	register char *yy_cp, *yy_bp;
+	register int yy_act;
+    
+#line 80 "lib/conf-lex.l"
+
+
+ /*
+  * STATE: INITIAL
+  */
+
+#line 1450 "<stdout>"
+
+	if ( !(yy_init) )
+		{
+		(yy_init) = 1;
+
+#ifdef YY_USER_INIT
+		YY_USER_INIT;
+#endif
+
+		if ( ! (yy_start) )
+			(yy_start) = 1;	/* first start state */
+
+		if ( ! sensors_yyin )
+			sensors_yyin = stdin;
+
+		if ( ! sensors_yyout )
+			sensors_yyout = stdout;
+
+		if ( ! YY_CURRENT_BUFFER ) {
+			sensors_yyensure_buffer_stack ();
+			YY_CURRENT_BUFFER_LVALUE =
+				sensors_yy_create_buffer(sensors_yyin,YY_BUF_SIZE );
+		}
+
+		sensors_yy_load_buffer_state( );
+		}
+
+	while ( 1 )		/* loops until end-of-file is reached */
+		{
+		yy_cp = (yy_c_buf_p);
+
+		/* Support of sensors_yytext. */
+		*yy_cp = (yy_hold_char);
+
+		/* yy_bp points to the position in yy_ch_buf of the start of
+		 * the current run.
+		 */
+		yy_bp = yy_cp;
+
+		yy_current_state = (yy_start);
+yy_match:
+		while ( (yy_current_state = yy_nxt[yy_current_state][ yy_ec[YY_SC_TO_UI(*yy_cp)]  ]) > 0 )
+			++yy_cp;
+
+		yy_current_state = -yy_current_state;
+
+yy_find_action:
+		yy_act = yy_accept[yy_current_state];
+
+		YY_DO_BEFORE_ACTION;
+
+do_action:	/* This label is used only to access EOF actions. */
+
+		switch ( yy_act )
+	{ /* beginning of action switch */
+
+case YY_STATE_EOF(INITIAL):
+#line 88 "lib/conf-lex.l"
+{ /* EOF from this state terminates */
+		  return 0;
+		}
+	YY_BREAK
+case 1:
+YY_RULE_SETUP
+#line 92 "lib/conf-lex.l"
+; /* eat as many blanks as possible at once */
+	YY_BREAK
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+#line 94 "lib/conf-lex.l"
+{ /* eat a bare newline (possibly preceded by blanks) */
+		  sensors_yylineno++;
+		}
+	YY_BREAK
+/* comments */
+case 3:
+YY_RULE_SETUP
+#line 100 "lib/conf-lex.l"
+; /* eat the rest of the line after comment char */
+	YY_BREAK
+case 4:
+/* rule 4 can match eol */
+YY_RULE_SETUP
+#line 102 "lib/conf-lex.l"
+{ /* eat the rest of the line after comment char */
+		  sensors_yylineno++;
+		}
+	YY_BREAK
+/*
+  * Keywords must be followed by whitespace - eat that too.
+  * If there isn't trailing whitespace, we still need to
+  * accept it as lexically correct (even though the parser
+  * will reject it anyway.)
+  */
+case 5:
+YY_RULE_SETUP
+#line 113 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return LABEL;
+		}
+	YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 120 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return SET;
+		}
+	YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 127 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return COMPUTE;
+		}
+	YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 134 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return BUS;
+		}
+	YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 141 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return CHIP;
+		}
+	YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 148 "lib/conf-lex.l"
+{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return IGNORE;
+		}
+	YY_BREAK
+/* Anything else at the beginning of a line is an error */
+case 11:
+#line 158 "lib/conf-lex.l"
+case 12:
+YY_RULE_SETUP
+#line 158 "lib/conf-lex.l"
+{
+		  BEGIN(ERR);
+		  strcpy(sensors_lex_error,"Invalid keyword");
+		  return ERROR;
+		}
+	YY_BREAK
+
+/*
+  * STATE: ERROR
+  */
+
+case 13:
+YY_RULE_SETUP
+#line 171 "lib/conf-lex.l"
+; /* eat whatever is left on this line */
+	YY_BREAK
+case 14:
+/* rule 14 can match eol */
+YY_RULE_SETUP
+#line 173 "lib/conf-lex.l"
+{
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+	YY_BREAK
+
+/*
+  * STATE: MIDDLE
+  */
+
+case 15:
+YY_RULE_SETUP
+#line 186 "lib/conf-lex.l"
+; /* eat as many blanks as possible at once */
+	YY_BREAK
+case 16:
+/* rule 16 can match eol */
+YY_RULE_SETUP
+#line 188 "lib/conf-lex.l"
+{ /* newline here sends EOL token to parser */
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+	YY_BREAK
+case YY_STATE_EOF(MIDDLE):
+#line 194 "lib/conf-lex.l"
+{ /* EOF here sends EOL token to parser also */
+		  BEGIN(INITIAL);
+		  return EOL;
+		}
+	YY_BREAK
+case 17:
+/* rule 17 can match eol */
+YY_RULE_SETUP
+#line 199 "lib/conf-lex.l"
+{ /* eat an escaped newline with no state change */
+		  sensors_yylineno++;
+		}
+	YY_BREAK
+/* comments */
+case 18:
+YY_RULE_SETUP
+#line 205 "lib/conf-lex.l"
+; /* eat the rest of the line after comment char */
+	YY_BREAK
+case 19:
+/* rule 19 can match eol */
+YY_RULE_SETUP
+#line 207 "lib/conf-lex.l"
+{ /* eat the rest of the line after comment char */
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+	YY_BREAK
+/* A number */
+case 20:
+YY_RULE_SETUP
+#line 215 "lib/conf-lex.l"
+{
+		  sensors_yylval.value = atof(sensors_yytext);
+		  return FLOAT;
+		}
+	YY_BREAK
+/* Some operators */
+case 21:
+YY_RULE_SETUP
+#line 222 "lib/conf-lex.l"
+return '+';
+	YY_BREAK
+case 22:
+YY_RULE_SETUP
+#line 223 "lib/conf-lex.l"
+return '-';
+	YY_BREAK
+case 23:
+YY_RULE_SETUP
+#line 224 "lib/conf-lex.l"
+return '*';
+	YY_BREAK
+case 24:
+YY_RULE_SETUP
+#line 225 "lib/conf-lex.l"
+return '/';
+	YY_BREAK
+case 25:
+YY_RULE_SETUP
+#line 226 "lib/conf-lex.l"
+return '(';
+	YY_BREAK
+case 26:
+YY_RULE_SETUP
+#line 227 "lib/conf-lex.l"
+return ')';
+	YY_BREAK
+case 27:
+YY_RULE_SETUP
+#line 228 "lib/conf-lex.l"
+return ',';
+	YY_BREAK
+case 28:
+YY_RULE_SETUP
+#line 229 "lib/conf-lex.l"
+return '@';
+	YY_BREAK
+case 29:
+YY_RULE_SETUP
+#line 230 "lib/conf-lex.l"
+return '^';
+	YY_BREAK
+case 30:
+YY_RULE_SETUP
+#line 231 "lib/conf-lex.l"
+return '`';
+	YY_BREAK
+/* Quoted string */
+case 31:
+YY_RULE_SETUP
+#line 235 "lib/conf-lex.l"
+{
+		  buffer_malloc();
+		  BEGIN(STRING);
+		}
+	YY_BREAK
+/* A normal, unquoted identifier */
+case 32:
+YY_RULE_SETUP
+#line 242 "lib/conf-lex.l"
+{
+		  sensors_yylval.name = strdup(sensors_yytext);
+		  if (! sensors_yylval.name)
+		    sensors_fatal_error("conf-lex.l",
+                                        "Allocating a new string");
+		  
+		  return NAME;
+		}
+	YY_BREAK
+/* anything else is bogus */
+case 33:
+#line 254 "lib/conf-lex.l"
+case 34:
+#line 255 "lib/conf-lex.l"
+case 35:
+YY_RULE_SETUP
+#line 255 "lib/conf-lex.l"
+{
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+	YY_BREAK
+
+/*
+  * STATE: STRING
+  */
+
+/* Oops, newline or EOF while in a string is not good */
+case 36:
+/* rule 36 can match eol */
+#line 270 "lib/conf-lex.l"
+case 37:
+/* rule 37 can match eol */
+YY_RULE_SETUP
+#line 270 "lib/conf-lex.l"
+{
+		  buffer_add_char("\0");
+		  strcpy(sensors_lex_error,
+			"No matching double quote.");
+		  buffer_free();
+		  yyless(0);
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+	YY_BREAK
+case YY_STATE_EOF(STRING):
+#line 280 "lib/conf-lex.l"
+{
+		  strcpy(sensors_lex_error,
+			"Reached end-of-file without a matching double quote.");
+		  buffer_free();
+		  BEGIN(MIDDLE);
+		  return ERROR;
+		}
+	YY_BREAK
+/* At the end */
+case 38:
+YY_RULE_SETUP
+#line 290 "lib/conf-lex.l"
+{
+		  buffer_add_char("\0");
+		  strcpy(sensors_lex_error,
+			"Quoted strings must be separated by whitespace.");
+		  buffer_free();
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+	YY_BREAK
+case 39:
+YY_RULE_SETUP
+#line 299 "lib/conf-lex.l"
+{
+		  buffer_add_char("\0");
+		  sensors_yylval.name = strdup(buffer);
+		  if (! sensors_yylval.name)
+		    sensors_fatal_error("conf-lex.l",
+                                        "Allocating a new string");
+		  buffer_free();
+		  BEGIN(MIDDLE);
+		  return NAME;
+		}
+	YY_BREAK
+case 40:
+YY_RULE_SETUP
+#line 310 "lib/conf-lex.l"
+buffer_add_char("\a");
+	YY_BREAK
+case 41:
+YY_RULE_SETUP
+#line 311 "lib/conf-lex.l"
+buffer_add_char("\b");
+	YY_BREAK
+case 42:
+YY_RULE_SETUP
+#line 312 "lib/conf-lex.l"
+buffer_add_char("\f");
+	YY_BREAK
+case 43:
+YY_RULE_SETUP
+#line 313 "lib/conf-lex.l"
+buffer_add_char("\n");
+	YY_BREAK
+case 44:
+YY_RULE_SETUP
+#line 314 "lib/conf-lex.l"
+buffer_add_char("\r");
+	YY_BREAK
+case 45:
+YY_RULE_SETUP
+#line 315 "lib/conf-lex.l"
+buffer_add_char("\t");
+	YY_BREAK
+case 46:
+YY_RULE_SETUP
+#line 316 "lib/conf-lex.l"
+buffer_add_char("\v");
+	YY_BREAK
+/* Other escapes: just copy the character behind the slash */
+case 47:
+YY_RULE_SETUP
+#line 320 "lib/conf-lex.l"
+{
+		  buffer_add_char(&sensors_yytext[1]);
+		}
+	YY_BREAK
+/* Anything else (including a bare '\' which may be followed by EOF) */
+case 48:
+#line 327 "lib/conf-lex.l"
+case 49:
+YY_RULE_SETUP
+#line 327 "lib/conf-lex.l"
+{
+		  buffer_add_string(sensors_yytext);
+		}
+	YY_BREAK
+
+case 50:
+YY_RULE_SETUP
+#line 332 "lib/conf-lex.l"
+YY_FATAL_ERROR( "flex scanner jammed" );
+	YY_BREAK
+#line 1903 "<stdout>"
+			case YY_STATE_EOF(ERR):
+				yyterminate();
+
+	case YY_END_OF_BUFFER:
+		{
+		/* Amount of text matched not including the EOB char. */
+		int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+		/* Undo the effects of YY_DO_BEFORE_ACTION. */
+		*yy_cp = (yy_hold_char);
+		YY_RESTORE_YY_MORE_OFFSET
+
+		if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+			{
+			/* We're scanning a new file or input source.  It's
+			 * possible that this happened because the user
+			 * just pointed sensors_yyin at a new source and called
+			 * sensors_yylex().  If so, then we have to assure
+			 * consistency between YY_CURRENT_BUFFER and our
+			 * globals.  Here is the right place to do so, because
+			 * this is the first action (other than possibly a
+			 * back-up) that will match for the new input source.
+			 */
+			(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+			YY_CURRENT_BUFFER_LVALUE->yy_input_file = sensors_yyin;
+			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+			}
+
+		/* Note that here we test for yy_c_buf_p "<=" to the position
+		 * of the first EOB in the buffer, since yy_c_buf_p will
+		 * already have been incremented past the NUL character
+		 * (since all states make transitions on EOB to the
+		 * end-of-buffer state).  Contrast this with the test
+		 * in input().
+		 */
+		if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+			{ /* This was really a NUL. */
+			yy_state_type yy_next_state;
+
+			(yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+			yy_current_state = yy_get_previous_state(  );
+
+			/* Okay, we're now positioned to make the NUL
+			 * transition.  We couldn't have
+			 * yy_get_previous_state() go ahead and do it
+			 * for us because it doesn't know how to deal
+			 * with the possibility of jamming (and we don't
+			 * want to build jamming into it because then it
+			 * will run more slowly).
+			 */
+
+			yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+			yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+			if ( yy_next_state )
+				{
+				/* Consume the NUL. */
+				yy_cp = ++(yy_c_buf_p);
+				yy_current_state = yy_next_state;
+				goto yy_match;
+				}
+
+			else
+				{
+				yy_cp = (yy_c_buf_p);
+				goto yy_find_action;
+				}
+			}
+
+		else switch ( yy_get_next_buffer(  ) )
+			{
+			case EOB_ACT_END_OF_FILE:
+				{
+				(yy_did_buffer_switch_on_eof) = 0;
+
+				if ( sensors_yywrap( ) )
+					{
+					/* Note: because we've taken care in
+					 * yy_get_next_buffer() to have set up
+					 * sensors_yytext, we can now set up
+					 * yy_c_buf_p so that if some total
+					 * hoser (like flex itself) wants to
+					 * call the scanner after we return the
+					 * YY_NULL, it'll still work - another
+					 * YY_NULL will get returned.
+					 */
+					(yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+					yy_act = YY_STATE_EOF(YY_START);
+					goto do_action;
+					}
+
+				else
+					{
+					if ( ! (yy_did_buffer_switch_on_eof) )
+						YY_NEW_FILE;
+					}
+				break;
+				}
+
+			case EOB_ACT_CONTINUE_SCAN:
+				(yy_c_buf_p) =
+					(yytext_ptr) + yy_amount_of_matched_text;
+
+				yy_current_state = yy_get_previous_state(  );
+
+				yy_cp = (yy_c_buf_p);
+				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+				goto yy_match;
+
+			case EOB_ACT_LAST_MATCH:
+				(yy_c_buf_p) =
+				&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+				yy_current_state = yy_get_previous_state(  );
+
+				yy_cp = (yy_c_buf_p);
+				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+				goto yy_find_action;
+			}
+		break;
+		}
+
+	default:
+		YY_FATAL_ERROR(
+			"fatal flex scanner internal error--no action found" );
+	} /* end of action switch */
+		} /* end of scanning one token */
+} /* end of sensors_yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *	EOB_ACT_LAST_MATCH -
+ *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *	EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (void)
+{
+    	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+	register char *source = (yytext_ptr);
+	register int number_to_move, i;
+	int ret_val;
+
+	if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+		YY_FATAL_ERROR(
+		"fatal flex scanner internal error--end of buffer missed" );
+
+	if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+		{ /* Don't try to fill the buffer, so this is an EOF. */
+		if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+			{
+			/* We matched a single character, the EOB, so
+			 * treat this as a final EOF.
+			 */
+			return EOB_ACT_END_OF_FILE;
+			}
+
+		else
+			{
+			/* We matched some text prior to the EOB, first
+			 * process it.
+			 */
+			return EOB_ACT_LAST_MATCH;
+			}
+		}
+
+	/* Try to read more data. */
+
+	/* First move last chars to start of buffer. */
+	number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+
+	for ( i = 0; i < number_to_move; ++i )
+		*(dest++) = *(source++);
+
+	if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+		/* don't do the read, it's not guaranteed to return an EOF,
+		 * just force an EOF
+		 */
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+	else
+		{
+			int num_to_read =
+			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+		while ( num_to_read <= 0 )
+			{ /* Not enough room in the buffer - grow it. */
+
+			/* just a shorter name for the current buffer */
+			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+
+			int yy_c_buf_p_offset =
+				(int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+			if ( b->yy_is_our_buffer )
+				{
+				int new_size = b->yy_buf_size * 2;
+
+				if ( new_size <= 0 )
+					b->yy_buf_size += b->yy_buf_size / 8;
+				else
+					b->yy_buf_size *= 2;
+
+				b->yy_ch_buf = (char *)
+					/* Include room in for 2 EOB chars. */
+					sensors_yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2  );
+				}
+			else
+				/* Can't grow it, we don't own it. */
+				b->yy_ch_buf = 0;
+
+			if ( ! b->yy_ch_buf )
+				YY_FATAL_ERROR(
+				"fatal error - scanner input buffer overflow" );
+
+			(yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+			num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+						number_to_move - 1;
+
+			}
+
+		if ( num_to_read > YY_READ_BUF_SIZE )
+			num_to_read = YY_READ_BUF_SIZE;
+
+		/* Read in more data. */
+		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+			(yy_n_chars), (size_t) num_to_read );
+
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	if ( (yy_n_chars) == 0 )
+		{
+		if ( number_to_move == YY_MORE_ADJ )
+			{
+			ret_val = EOB_ACT_END_OF_FILE;
+			sensors_yyrestart(sensors_yyin  );
+			}
+
+		else
+			{
+			ret_val = EOB_ACT_LAST_MATCH;
+			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+				YY_BUFFER_EOF_PENDING;
+			}
+		}
+
+	else
+		ret_val = EOB_ACT_CONTINUE_SCAN;
+
+	if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+		/* Extend the array by 50%, plus the number we really need. */
+		yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) sensors_yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
+		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+	}
+
+	(yy_n_chars) += number_to_move;
+	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+	(yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+	return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+    static yy_state_type yy_get_previous_state (void)
+{
+	register yy_state_type yy_current_state;
+	register char *yy_cp;
+    
+	yy_current_state = (yy_start);
+
+	for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+		{
+		yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+		}
+
+	return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *	next_state = yy_try_NUL_trans( current_state );
+ */
+    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
+{
+	register int yy_is_jam;
+    
+	yy_current_state = yy_nxt[yy_current_state][1];
+	yy_is_jam = (yy_current_state <= 0);
+
+	return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+    static int yyinput (void)
+#else
+    static int input  (void)
+#endif
+
+{
+	int c;
+    
+	*(yy_c_buf_p) = (yy_hold_char);
+
+	if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+		{
+		/* yy_c_buf_p now points to the character we want to return.
+		 * If this occurs *before* the EOB characters, then it's a
+		 * valid NUL; if not, then we've hit the end of the buffer.
+		 */
+		if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+			/* This was really a NUL. */
+			*(yy_c_buf_p) = '\0';
+
+		else
+			{ /* need more input */
+			int offset = (yy_c_buf_p) - (yytext_ptr);
+			++(yy_c_buf_p);
+
+			switch ( yy_get_next_buffer(  ) )
+				{
+				case EOB_ACT_LAST_MATCH:
+					/* This happens because yy_g_n_b()
+					 * sees that we've accumulated a
+					 * token and flags that we need to
+					 * try matching the token before
+					 * proceeding.  But for input(),
+					 * there's no matching to consider.
+					 * So convert the EOB_ACT_LAST_MATCH
+					 * to EOB_ACT_END_OF_FILE.
+					 */
+
+					/* Reset buffer status. */
+					sensors_yyrestart(sensors_yyin );
+
+					/*FALLTHROUGH*/
+
+				case EOB_ACT_END_OF_FILE:
+					{
+					if ( sensors_yywrap( ) )
+						return EOF;
+
+					if ( ! (yy_did_buffer_switch_on_eof) )
+						YY_NEW_FILE;
+#ifdef __cplusplus
+					return yyinput();
+#else
+					return input();
+#endif
+					}
+
+				case EOB_ACT_CONTINUE_SCAN:
+					(yy_c_buf_p) = (yytext_ptr) + offset;
+					break;
+				}
+			}
+		}
+
+	c = *(unsigned char *) (yy_c_buf_p);	/* cast for 8-bit char's */
+	*(yy_c_buf_p) = '\0';	/* preserve sensors_yytext */
+	(yy_hold_char) = *++(yy_c_buf_p);
+
+	return c;
+}
+#endif	/* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * 
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+    void sensors_yyrestart  (FILE * input_file )
+{
+    
+	if ( ! YY_CURRENT_BUFFER ){
+        sensors_yyensure_buffer_stack ();
+		YY_CURRENT_BUFFER_LVALUE =
+            sensors_yy_create_buffer(sensors_yyin,YY_BUF_SIZE );
+	}
+
+	sensors_yy_init_buffer(YY_CURRENT_BUFFER,input_file );
+	sensors_yy_load_buffer_state( );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * 
+ */
+    void sensors_yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
+{
+    
+	/* TODO. We should be able to replace this entire function body
+	 * with
+	 *		sensors_yypop_buffer_state();
+	 *		sensors_yypush_buffer_state(new_buffer);
+     */
+	sensors_yyensure_buffer_stack ();
+	if ( YY_CURRENT_BUFFER == new_buffer )
+		return;
+
+	if ( YY_CURRENT_BUFFER )
+		{
+		/* Flush out information for old buffer. */
+		*(yy_c_buf_p) = (yy_hold_char);
+		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	YY_CURRENT_BUFFER_LVALUE = new_buffer;
+	sensors_yy_load_buffer_state( );
+
+	/* We don't actually know whether we did this switch during
+	 * EOF (sensors_yywrap()) processing, but the only time this flag
+	 * is looked at is after sensors_yywrap() is called, so it's safe
+	 * to go ahead and always set it.
+	 */
+	(yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void sensors_yy_load_buffer_state  (void)
+{
+    	(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+	(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+	sensors_yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+	(yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * 
+ * @return the allocated buffer state.
+ */
+    YY_BUFFER_STATE sensors_yy_create_buffer  (FILE * file, int  size )
+{
+	YY_BUFFER_STATE b;
+    
+	b = (YY_BUFFER_STATE) sensors_yyalloc(sizeof( struct yy_buffer_state )  );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in sensors_yy_create_buffer()" );
+
+	b->yy_buf_size = size;
+
+	/* yy_ch_buf has to be 2 characters longer than the size given because
+	 * we need to put in 2 end-of-buffer characters.
+	 */
+	b->yy_ch_buf = (char *) sensors_yyalloc(b->yy_buf_size + 2  );
+	if ( ! b->yy_ch_buf )
+		YY_FATAL_ERROR( "out of dynamic memory in sensors_yy_create_buffer()" );
+
+	b->yy_is_our_buffer = 1;
+
+	sensors_yy_init_buffer(b,file );
+
+	return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with sensors_yy_create_buffer()
+ * 
+ */
+    void sensors_yy_delete_buffer (YY_BUFFER_STATE  b )
+{
+    
+	if ( ! b )
+		return;
+
+	if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+	if ( b->yy_is_our_buffer )
+		sensors_yyfree((void *) b->yy_ch_buf  );
+
+	sensors_yyfree((void *) b  );
+}
+
+#ifndef __cplusplus
+extern int isatty (int );
+#endif /* __cplusplus */
+    
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a sensors_yyrestart() or at EOF.
+ */
+    static void sensors_yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
+
+{
+	int oerrno = errno;
+    
+	sensors_yy_flush_buffer(b );
+
+	b->yy_input_file = file;
+	b->yy_fill_buffer = 1;
+
+    /* If b is the current buffer, then sensors_yy_init_buffer was _probably_
+     * called from sensors_yyrestart() or through yy_get_next_buffer.
+     * In that case, we don't want to reset the lineno or column.
+     */
+    if (b != YY_CURRENT_BUFFER){
+        b->yy_bs_lineno = 1;
+        b->yy_bs_column = 0;
+    }
+
+        b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+    
+	errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * 
+ */
+    void sensors_yy_flush_buffer (YY_BUFFER_STATE  b )
+{
+    	if ( ! b )
+		return;
+
+	b->yy_n_chars = 0;
+
+	/* We always need two end-of-buffer characters.  The first causes
+	 * a transition to the end-of-buffer state.  The second causes
+	 * a jam in that state.
+	 */
+	b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+	b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+	b->yy_buf_pos = &b->yy_ch_buf[0];
+
+	b->yy_at_bol = 1;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	if ( b == YY_CURRENT_BUFFER )
+		sensors_yy_load_buffer_state( );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ *  the current state. This function will allocate the stack
+ *  if necessary.
+ *  @param new_buffer The new state.
+ *  
+ */
+void sensors_yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+    	if (new_buffer == NULL)
+		return;
+
+	sensors_yyensure_buffer_stack();
+
+	/* This block is copied from sensors_yy_switch_to_buffer. */
+	if ( YY_CURRENT_BUFFER )
+		{
+		/* Flush out information for old buffer. */
+		*(yy_c_buf_p) = (yy_hold_char);
+		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+		}
+
+	/* Only push if top exists. Otherwise, replace top. */
+	if (YY_CURRENT_BUFFER)
+		(yy_buffer_stack_top)++;
+	YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+	/* copied from sensors_yy_switch_to_buffer. */
+	sensors_yy_load_buffer_state( );
+	(yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ *  The next element becomes the new top.
+ *  
+ */
+void sensors_yypop_buffer_state (void)
+{
+    	if (!YY_CURRENT_BUFFER)
+		return;
+
+	sensors_yy_delete_buffer(YY_CURRENT_BUFFER );
+	YY_CURRENT_BUFFER_LVALUE = NULL;
+	if ((yy_buffer_stack_top) > 0)
+		--(yy_buffer_stack_top);
+
+	if (YY_CURRENT_BUFFER) {
+		sensors_yy_load_buffer_state( );
+		(yy_did_buffer_switch_on_eof) = 1;
+	}
+}
+
+/* Allocates the stack if it does not exist.
+ *  Guarantees space for at least one push.
+ */
+static void sensors_yyensure_buffer_stack (void)
+{
+	int num_to_alloc;
+    
+	if (!(yy_buffer_stack)) {
+
+		/* First allocation is just for 2 elements, since we don't know if this
+		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
+		 * immediate realloc on the next call.
+         */
+		num_to_alloc = 1;
+		(yy_buffer_stack) = (struct yy_buffer_state**)sensors_yyalloc
+								(num_to_alloc * sizeof(struct yy_buffer_state*)
+								);
+		if ( ! (yy_buffer_stack) )
+			YY_FATAL_ERROR( "out of dynamic memory in sensors_yyensure_buffer_stack()" );
+								  
+		memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+				
+		(yy_buffer_stack_max) = num_to_alloc;
+		(yy_buffer_stack_top) = 0;
+		return;
+	}
+
+	if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+		/* Increase the buffer to prepare for a possible push. */
+		int grow_size = 8 /* arbitrary grow size */;
+
+		num_to_alloc = (yy_buffer_stack_max) + grow_size;
+		(yy_buffer_stack) = (struct yy_buffer_state**)sensors_yyrealloc
+								((yy_buffer_stack),
+								num_to_alloc * sizeof(struct yy_buffer_state*)
+								);
+		if ( ! (yy_buffer_stack) )
+			YY_FATAL_ERROR( "out of dynamic memory in sensors_yyensure_buffer_stack()" );
+
+		/* zero only the new slots.*/
+		memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+		(yy_buffer_stack_max) = num_to_alloc;
+	}
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * 
+ * @return the newly allocated buffer state object. 
+ */
+YY_BUFFER_STATE sensors_yy_scan_buffer  (char * base, yy_size_t  size )
+{
+	YY_BUFFER_STATE b;
+    
+	if ( size < 2 ||
+	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
+	     base[size-1] != YY_END_OF_BUFFER_CHAR )
+		/* They forgot to leave room for the EOB's. */
+		return 0;
+
+	b = (YY_BUFFER_STATE) sensors_yyalloc(sizeof( struct yy_buffer_state )  );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in sensors_yy_scan_buffer()" );
+
+	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
+	b->yy_buf_pos = b->yy_ch_buf = base;
+	b->yy_is_our_buffer = 0;
+	b->yy_input_file = 0;
+	b->yy_n_chars = b->yy_buf_size;
+	b->yy_is_interactive = 0;
+	b->yy_at_bol = 1;
+	b->yy_fill_buffer = 0;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	sensors_yy_switch_to_buffer(b  );
+
+	return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to sensors_yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * 
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ *       sensors_yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE sensors_yy_scan_string (yyconst char * yystr )
+{
+    
+	return sensors_yy_scan_bytes(yystr,strlen(yystr) );
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to sensors_yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ * 
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE sensors_yy_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
+{
+	YY_BUFFER_STATE b;
+	char *buf;
+	yy_size_t n;
+	int i;
+    
+	/* Get memory for full buffer, including space for trailing EOB's. */
+	n = _yybytes_len + 2;
+	buf = (char *) sensors_yyalloc(n  );
+	if ( ! buf )
+		YY_FATAL_ERROR( "out of dynamic memory in sensors_yy_scan_bytes()" );
+
+	for ( i = 0; i < _yybytes_len; ++i )
+		buf[i] = yybytes[i];
+
+	buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+	b = sensors_yy_scan_buffer(buf,n );
+	if ( ! b )
+		YY_FATAL_ERROR( "bad buffer in sensors_yy_scan_bytes()" );
+
+	/* It's okay to grow etc. this buffer, and we should throw it
+	 * away when we're done.
+	 */
+	b->yy_is_our_buffer = 1;
+
+	return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yy_fatal_error (yyconst char* msg )
+{
+    	(void) fprintf( stderr, "%s\n", msg );
+	exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up sensors_yytext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+		sensors_yytext[sensors_yyleng] = (yy_hold_char); \
+		(yy_c_buf_p) = sensors_yytext + yyless_macro_arg; \
+		(yy_hold_char) = *(yy_c_buf_p); \
+		*(yy_c_buf_p) = '\0'; \
+		sensors_yyleng = yyless_macro_arg; \
+		} \
+	while ( 0 )
+
+/* Accessor  methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ * 
+ */
+int sensors_yyget_lineno  (void)
+{
+        
+    return sensors_yylineno;
+}
+
+/** Get the input stream.
+ * 
+ */
+FILE *sensors_yyget_in  (void)
+{
+        return sensors_yyin;
+}
+
+/** Get the output stream.
+ * 
+ */
+FILE *sensors_yyget_out  (void)
+{
+        return sensors_yyout;
+}
+
+/** Get the length of the current token.
+ * 
+ */
+int sensors_yyget_leng  (void)
+{
+        return sensors_yyleng;
+}
+
+/** Get the current token.
+ * 
+ */
+
+char *sensors_yyget_text  (void)
+{
+        return sensors_yytext;
+}
+
+/** Set the current line number.
+ * @param line_number
+ * 
+ */
+void sensors_yyset_lineno (int  line_number )
+{
+    
+    sensors_yylineno = line_number;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param in_str A readable stream.
+ * 
+ * @see sensors_yy_switch_to_buffer
+ */
+void sensors_yyset_in (FILE *  in_str )
+{
+        sensors_yyin = in_str ;
+}
+
+void sensors_yyset_out (FILE *  out_str )
+{
+        sensors_yyout = out_str ;
+}
+
+int sensors_yyget_debug  (void)
+{
+        return sensors_yy_flex_debug;
+}
+
+void sensors_yyset_debug (int  bdebug )
+{
+        sensors_yy_flex_debug = bdebug ;
+}
+
+static int yy_init_globals (void)
+{
+        /* Initialization is the same as for the non-reentrant scanner.
+     * This function is called from sensors_yylex_destroy(), so don't allocate here.
+     */
+
+    (yy_buffer_stack) = 0;
+    (yy_buffer_stack_top) = 0;
+    (yy_buffer_stack_max) = 0;
+    (yy_c_buf_p) = (char *) 0;
+    (yy_init) = 0;
+    (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+    sensors_yyin = stdin;
+    sensors_yyout = stdout;
+#else
+    sensors_yyin = (FILE *) 0;
+    sensors_yyout = (FILE *) 0;
+#endif
+
+    /* For future reference: Set errno on error, since we are called by
+     * sensors_yylex_init()
+     */
+    return 0;
+}
+
+/* sensors_yylex_destroy is for both reentrant and non-reentrant scanners. */
+int sensors_yylex_destroy  (void)
+{
+    
+    /* Pop the buffer stack, destroying each element. */
+	while(YY_CURRENT_BUFFER){
+		sensors_yy_delete_buffer(YY_CURRENT_BUFFER  );
+		YY_CURRENT_BUFFER_LVALUE = NULL;
+		sensors_yypop_buffer_state();
+	}
+
+	/* Destroy the stack itself. */
+	sensors_yyfree((yy_buffer_stack) );
+	(yy_buffer_stack) = NULL;
+
+    /* Reset the globals. This is important in a non-reentrant scanner so the next time
+     * sensors_yylex() is called, initialization will occur. */
+    yy_init_globals( );
+
+    return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
+{
+	register int i;
+	for ( i = 0; i < n; ++i )
+		s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * s )
+{
+	register int n;
+	for ( n = 0; s[n]; ++n )
+		;
+
+	return n;
+}
+#endif
+
+void *sensors_yyalloc (yy_size_t  size )
+{
+	return (void *) malloc( size );
+}
+
+void *sensors_yyrealloc  (void * ptr, yy_size_t  size )
+{
+	/* The cast to (char *) in the following accommodates both
+	 * implementations that use char* generic pointers, and those
+	 * that use void* generic pointers.  It works with the latter
+	 * because both ANSI C and C++ allow castless assignment from
+	 * any pointer type to void*, and deal with argument conversions
+	 * as though doing an assignment.
+	 */
+	return (void *) realloc( (char *) ptr, size );
+}
+
+void sensors_yyfree (void * ptr )
+{
+	free( (char *) ptr );	/* see sensors_yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 332 "lib/conf-lex.l"
+
+
+
+/*
+	Do the buffer handling manually.  This allows us to scan as many
+	config files as we need to, while cleaning up properly after each
+	one.  The "BEGIN(0)" line ensures that we start in the default state,
+	even if e.g. the previous config file was syntactically broken.
+
+	Returns 0 if successful, !0 otherwise.
+*/
+
+static YY_BUFFER_STATE scan_buf = (YY_BUFFER_STATE)0;
+
+int sensors_scanner_init(FILE *input, const char *filename)
+{
+	BEGIN(0);
+	if (!(scan_buf = sensors_yy_create_buffer(input, YY_BUF_SIZE)))
+		return -1;
+
+	sensors_yy_switch_to_buffer(scan_buf);
+	sensors_yyfilename = filename;
+	sensors_yylineno = 1;
+	return 0;
+}
+
+void sensors_scanner_exit(void)
+{
+	sensors_yy_delete_buffer(scan_buf);
+	scan_buf = (YY_BUFFER_STATE)0;
+
+/* As of flex 2.5.9, sensors_yylex_destroy() must be called when done with the
+   scaller, otherwise we'll leak memory. */
+#if defined(YY_FLEX_MAJOR_VERSION) && defined(YY_FLEX_MINOR_VERSION) && defined(YY_FLEX_SUBMINOR_VERSION)
+#if YY_FLEX_MAJOR_VERSION > 2 || \
+    (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION > 5 || \
+				    (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION >= 9)))
+	sensors_yylex_destroy();
+#endif
+#endif
+}
+
+
diff --git a/tools/gator/daemon/libsensors/conf-lex.l b/tools/gator/daemon/libsensors/conf-lex.l
new file mode 100644
index 000000000000..43ddbd8f5bd5
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf-lex.l
@@ -0,0 +1,372 @@
+%{
+/*
+    conf-lex.l - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl> 
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "general.h"
+#include "data.h"
+#include "conf-parse.h"
+#include "error.h"
+#include "scanner.h"
+
+static int buffer_count;
+static int buffer_max;
+static char *buffer;
+
+char sensors_lex_error[100];
+
+const char *sensors_yyfilename;
+int sensors_yylineno;
+
+#define buffer_malloc() sensors_malloc_array(&buffer,&buffer_count,\
+                                             &buffer_max,1)
+#define buffer_free() sensors_free_array(&buffer,&buffer_count,\
+                                         &buffer_max)
+#define buffer_add_char(c) sensors_add_array_el(c,&buffer,\
+                                                &buffer_count,\
+                                                &buffer_max,1)
+#define buffer_add_string(s) sensors_add_array_els(s,strlen(s),\
+                                                   &buffer, \
+                                                   &buffer_count,&buffer_max,1)
+
+%}
+
+ /* Scanner for configuration files */
+
+%option nodefault
+%option noyywrap
+%option nounput
+
+ /* All states are exclusive */
+
+%x MIDDLE
+%x STRING
+%x ERR
+
+ /* Any whitespace-like character */
+
+BLANK		[ \f\r\t\v]
+
+IDCHAR		[[:alnum:]_]
+
+ /* Note: `10', `10.4' and `.4' are valid, `10.' is not */
+
+FLOAT   [[:digit:]]*\.?[[:digit:]]+
+
+ /* Only positive whole numbers are recognized here */
+
+NUM	0|([1-9][[:digit:]]*)
+
+
+%%
+
+ /*
+  * STATE: INITIAL
+  */
+
+<INITIAL>{
+
+<<EOF>>		{ /* EOF from this state terminates */
+		  return 0;
+		}
+
+{BLANK}+	; /* eat as many blanks as possible at once */
+
+{BLANK}*\n	{ /* eat a bare newline (possibly preceded by blanks) */
+		  sensors_yylineno++;
+		}
+
+ /* comments */
+
+#.*		; /* eat the rest of the line after comment char */
+
+#.*\n		{ /* eat the rest of the line after comment char */
+		  sensors_yylineno++;
+		}
+
+ /*
+  * Keywords must be followed by whitespace - eat that too.
+  * If there isn't trailing whitespace, we still need to
+  * accept it as lexically correct (even though the parser
+  * will reject it anyway.)
+  */
+
+label{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return LABEL;
+		}
+
+set{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return SET;
+		}
+
+compute{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return COMPUTE;
+		}
+
+bus{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return BUS;
+		}
+
+chip{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return CHIP;
+		}
+
+ignore{BLANK}*	{
+		  sensors_yylval.line.filename = sensors_yyfilename;
+		  sensors_yylval.line.lineno = sensors_yylineno;
+		  BEGIN(MIDDLE);
+		  return IGNORE;
+		}
+
+ /* Anything else at the beginning of a line is an error */
+
+[a-z]+		|
+.		{
+		  BEGIN(ERR);
+		  strcpy(sensors_lex_error,"Invalid keyword");
+		  return ERROR;
+		}
+}
+
+ /*
+  * STATE: ERROR
+  */
+
+<ERR>{
+
+.*		; /* eat whatever is left on this line */
+
+\n		{
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+}
+
+ /*
+  * STATE: MIDDLE
+  */
+
+<MIDDLE>{
+
+{BLANK}+	; /* eat as many blanks as possible at once */
+
+\n		{ /* newline here sends EOL token to parser */
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+
+<<EOF>>		{ /* EOF here sends EOL token to parser also */
+		  BEGIN(INITIAL);
+		  return EOL;
+		}
+
+\\{BLANK}*\n	{ /* eat an escaped newline with no state change */
+		  sensors_yylineno++;
+		}
+
+ /* comments */
+
+#.*		; /* eat the rest of the line after comment char */
+
+#.*\n		{ /* eat the rest of the line after comment char */
+		  BEGIN(INITIAL);
+		  sensors_yylineno++;
+		  return EOL;
+		}
+
+ /* A number */
+
+{FLOAT}		{
+		  sensors_yylval.value = atof(sensors_yytext);
+		  return FLOAT;
+		}
+
+ /* Some operators */
+
+"+"		return '+';
+"-"		return '-';
+"*"		return '*';
+"/"		return '/';
+"("		return '(';
+")"		return ')';
+","		return ',';
+"@"		return '@';
+"^"		return '^';
+"`"		return '`';
+
+ /* Quoted string */
+
+\"		{
+		  buffer_malloc();
+		  BEGIN(STRING);
+		}
+
+ /* A normal, unquoted identifier */
+
+{IDCHAR}+	{
+		  sensors_yylval.name = strdup(sensors_yytext);
+		  if (! sensors_yylval.name)
+		    sensors_fatal_error("conf-lex.l",
+                                        "Allocating a new string");
+		  
+		  return NAME;
+		}
+
+ /* anything else is bogus */
+
+.		|
+[[:digit:]]*\.	|
+\\{BLANK}*	{
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+}
+
+ /*
+  * STATE: STRING
+  */
+
+<STRING>{
+
+ /* Oops, newline or EOF while in a string is not good */
+
+\n		|
+\\\n		{
+		  buffer_add_char("\0");
+		  strcpy(sensors_lex_error,
+			"No matching double quote.");
+		  buffer_free();
+		  yyless(0);
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+
+<<EOF>>		{
+		  strcpy(sensors_lex_error,
+			"Reached end-of-file without a matching double quote.");
+		  buffer_free();
+		  BEGIN(MIDDLE);
+		  return ERROR;
+		}
+
+ /* At the end */
+
+\"\"		{
+		  buffer_add_char("\0");
+		  strcpy(sensors_lex_error,
+			"Quoted strings must be separated by whitespace.");
+		  buffer_free();
+		  BEGIN(ERR);
+		  return ERROR;
+		}
+		
+\"		{
+		  buffer_add_char("\0");
+		  sensors_yylval.name = strdup(buffer);
+		  if (! sensors_yylval.name)
+		    sensors_fatal_error("conf-lex.l",
+                                        "Allocating a new string");
+		  buffer_free();
+		  BEGIN(MIDDLE);
+		  return NAME;
+		}
+
+\\a		buffer_add_char("\a");
+\\b		buffer_add_char("\b");
+\\f		buffer_add_char("\f");
+\\n		buffer_add_char("\n");
+\\r		buffer_add_char("\r");
+\\t		buffer_add_char("\t");
+\\v		buffer_add_char("\v");
+
+ /* Other escapes: just copy the character behind the slash */
+
+\\.		{
+		  buffer_add_char(&sensors_yytext[1]);
+		}
+
+ /* Anything else (including a bare '\' which may be followed by EOF) */
+
+\\		|
+[^\\\n\"]+	{
+		  buffer_add_string(sensors_yytext);
+		}
+}
+
+%%
+
+/*
+	Do the buffer handling manually.  This allows us to scan as many
+	config files as we need to, while cleaning up properly after each
+	one.  The "BEGIN(0)" line ensures that we start in the default state,
+	even if e.g. the previous config file was syntactically broken.
+
+	Returns 0 if successful, !0 otherwise.
+*/
+
+static YY_BUFFER_STATE scan_buf = (YY_BUFFER_STATE)0;
+
+int sensors_scanner_init(FILE *input, const char *filename)
+{
+	BEGIN(0);
+	if (!(scan_buf = sensors_yy_create_buffer(input, YY_BUF_SIZE)))
+		return -1;
+
+	sensors_yy_switch_to_buffer(scan_buf);
+	sensors_yyfilename = filename;
+	sensors_yylineno = 1;
+	return 0;
+}
+
+void sensors_scanner_exit(void)
+{
+	sensors_yy_delete_buffer(scan_buf);
+	scan_buf = (YY_BUFFER_STATE)0;
+
+/* As of flex 2.5.9, yylex_destroy() must be called when done with the
+   scaller, otherwise we'll leak memory. */
+#if defined(YY_FLEX_MAJOR_VERSION) && defined(YY_FLEX_MINOR_VERSION) && defined(YY_FLEX_SUBMINOR_VERSION)
+#if YY_FLEX_MAJOR_VERSION > 2 || \
+    (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION > 5 || \
+				    (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION >= 9)))
+	sensors_yylex_destroy();
+#endif
+#endif
+}
+
diff --git a/tools/gator/daemon/libsensors/conf-parse.c b/tools/gator/daemon/libsensors/conf-parse.c
new file mode 100644
index 000000000000..fb775460a1c3
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf-parse.c
@@ -0,0 +1,2042 @@
+/* A Bison parser, made by GNU Bison 2.5.  */
+
+/* Bison implementation for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+   simplifying the original so-called "semantic" parser.  */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+   infringing on user name space.  This should be done even for local
+   variables, as they might otherwise be expanded by user macros.
+   There are some unavoidable exceptions within include files to
+   define necessary library symbols; they are noted "INFRINGES ON
+   USER NAME SPACE" below.  */
+
+/* Identify Bison output.  */
+#define YYBISON 1
+
+/* Bison version.  */
+#define YYBISON_VERSION "2.5"
+
+/* Skeleton name.  */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers.  */
+#define YYPURE 0
+
+/* Push parsers.  */
+#define YYPUSH 0
+
+/* Pull parsers.  */
+#define YYPULL 1
+
+/* Using locations.  */
+#define YYLSP_NEEDED 0
+
+/* Substitute the variable and function names.  */
+#define yyparse         sensors_yyparse
+#define yylex           sensors_yylex
+#define yyerror         sensors_yyerror
+#define yylval          sensors_yylval
+#define yychar          sensors_yychar
+#define yydebug         sensors_yydebug
+#define yynerrs         sensors_yynerrs
+
+
+/* Copy the first part of user declarations.  */
+
+/* Line 268 of yacc.c  */
+#line 1 "lib/conf-parse.y"
+
+/*
+    conf-parse.y - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#define YYERROR_VERBOSE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "data.h"
+#include "general.h"
+#include "error.h"
+#include "conf.h"
+#include "access.h"
+#include "init.h"
+
+static void sensors_yyerror(const char *err);
+static sensors_expr *malloc_expr(void);
+
+static sensors_chip *current_chip = NULL;
+
+#define bus_add_el(el) sensors_add_array_el(el,\
+                                      &sensors_config_busses,\
+                                      &sensors_config_busses_count,\
+                                      &sensors_config_busses_max,\
+                                      sizeof(sensors_bus))
+#define label_add_el(el) sensors_add_array_el(el,\
+                                        &current_chip->labels,\
+                                        &current_chip->labels_count,\
+                                        &current_chip->labels_max,\
+                                        sizeof(sensors_label));
+#define set_add_el(el) sensors_add_array_el(el,\
+                                      &current_chip->sets,\
+                                      &current_chip->sets_count,\
+                                      &current_chip->sets_max,\
+                                      sizeof(sensors_set));
+#define compute_add_el(el) sensors_add_array_el(el,\
+                                          &current_chip->computes,\
+                                          &current_chip->computes_count,\
+                                          &current_chip->computes_max,\
+                                          sizeof(sensors_compute));
+#define ignore_add_el(el) sensors_add_array_el(el,\
+                                          &current_chip->ignores,\
+                                          &current_chip->ignores_count,\
+                                          &current_chip->ignores_max,\
+                                          sizeof(sensors_ignore));
+#define chip_add_el(el) sensors_add_array_el(el,\
+                                       &sensors_config_chips,\
+                                       &sensors_config_chips_count,\
+                                       &sensors_config_chips_max,\
+                                       sizeof(sensors_chip));
+
+#define fits_add_el(el,list) sensors_add_array_el(el,\
+                                                  &(list).fits,\
+                                                  &(list).fits_count,\
+                                                  &(list).fits_max, \
+		                                  sizeof(sensors_chip_name));
+
+
+
+/* Line 268 of yacc.c  */
+#line 158 "lib/conf-parse.c"
+
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages.  */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table.  */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     NEG = 258,
+     EOL = 259,
+     BUS = 260,
+     LABEL = 261,
+     SET = 262,
+     CHIP = 263,
+     COMPUTE = 264,
+     IGNORE = 265,
+     FLOAT = 266,
+     NAME = 267,
+     ERROR = 268
+   };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 293 of yacc.c  */
+#line 79 "lib/conf-parse.y"
+
+  double value;
+  char *name;
+  void *nothing;
+  sensors_chip_name_list chips;
+  sensors_expr *expr;
+  sensors_bus_id bus;
+  sensors_chip_name chip;
+  sensors_config_line line;
+
+
+
+/* Line 293 of yacc.c  */
+#line 220 "lib/conf-parse.c"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations.  */
+
+
+/* Line 343 of yacc.c  */
+#line 232 "lib/conf-parse.c"
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+#  define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+#  define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#  include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYSIZE_T size_t
+# else
+#  define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+#  if ENABLE_NLS
+#   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+#   define YY_(msgid) dgettext ("bison-runtime", msgid)
+#  endif
+# endif
+# ifndef YY_
+#  define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions.  */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+    int yyi;
+#endif
+{
+  return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols.  */
+
+# ifdef YYSTACK_USE_ALLOCA
+#  if YYSTACK_USE_ALLOCA
+#   ifdef __GNUC__
+#    define YYSTACK_ALLOC __builtin_alloca
+#   elif defined __BUILTIN_VA_ARG_INCR
+#    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+#   elif defined _AIX
+#    define YYSTACK_ALLOC __alloca
+#   elif defined _MSC_VER
+#    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+#    define alloca _alloca
+#   else
+#    define YYSTACK_ALLOC alloca
+#    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#     ifndef EXIT_SUCCESS
+#      define EXIT_SUCCESS 0
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+   /* Pacify GCC's `empty if-body' warning.  */
+#  define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+    /* The OS might guarantee only one guard page at the bottom of the stack,
+       and a page size can be as small as 4096 bytes.  So we cannot safely
+       invoke alloca (N) if N exceeds 4096.  Use a slightly smaller number
+       to allow for a few compiler-allocated temporary stack slots.  */
+#   define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+#  endif
+# else
+#  define YYSTACK_ALLOC YYMALLOC
+#  define YYSTACK_FREE YYFREE
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+#   define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+#  endif
+#  if (defined __cplusplus && ! defined EXIT_SUCCESS \
+       && ! ((defined YYMALLOC || defined malloc) \
+	     && (defined YYFREE || defined free)))
+#   include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#   ifndef EXIT_SUCCESS
+#    define EXIT_SUCCESS 0
+#   endif
+#  endif
+#  ifndef YYMALLOC
+#   define YYMALLOC malloc
+#   if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+#  ifndef YYFREE
+#   define YYFREE free
+#   if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+     && (! defined __cplusplus \
+	 || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member.  */
+union yyalloc
+{
+  yytype_int16 yyss_alloc;
+  YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next.  */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+   N elements.  */
+# define YYSTACK_BYTES(N) \
+     ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+      + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one.  The
+   local variables YYSIZE and YYSTACKSIZE give the old and new number of
+   elements in the stack, and YYPTR gives the new location of the
+   stack.  Advance YYPTR to a properly aligned location for the next
+   stack.  */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack)				\
+    do									\
+      {									\
+	YYSIZE_T yynewbytes;						\
+	YYCOPY (&yyptr->Stack_alloc, Stack, yysize);			\
+	Stack = &yyptr->Stack_alloc;					\
+	yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+	yyptr += yynewbytes / sizeof (*yyptr);				\
+      }									\
+    while (YYID (0))
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from FROM to TO.  The source and destination do
+   not overlap.  */
+# ifndef YYCOPY
+#  if defined __GNUC__ && 1 < __GNUC__
+#   define YYCOPY(To, From, Count) \
+      __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+#  else
+#   define YYCOPY(To, From, Count)		\
+      do					\
+	{					\
+	  YYSIZE_T yyi;				\
+	  for (yyi = 0; yyi < (Count); yyi++)	\
+	    (To)[yyi] = (From)[yyi];		\
+	}					\
+      while (YYID (0))
+#  endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state.  */
+#define YYFINAL  2
+/* YYLAST -- Last index in YYTABLE.  */
+#define YYLAST   58
+
+/* YYNTOKENS -- Number of terminals.  */
+#define YYNTOKENS  24
+/* YYNNTS -- Number of nonterminals.  */
+#define YYNNTS  16
+/* YYNRULES -- Number of rules.  */
+#define YYNRULES  34
+/* YYNRULES -- Number of states.  */
+#define YYNSTATES  63
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
+#define YYUNDEFTOK  2
+#define YYMAXUTOK   268
+
+#define YYTRANSLATE(YYX)						\
+  ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX.  */
+static const yytype_uint8 yytranslate[] =
+{
+       0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+      22,    23,     5,     4,    10,     3,     2,     6,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,    21,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     8,     2,     9,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     2,     7,    11,
+      12,    13,    14,    15,    16,    17,    18,    19,    20
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+   YYRHS.  */
+static const yytype_uint8 yyprhs[] =
+{
+       0,     0,     3,     4,     7,    10,    13,    16,    19,    22,
+      25,    28,    32,    36,    40,    46,    49,    52,    54,    57,
+      59,    61,    63,    67,    71,    75,    79,    82,    86,    89,
+      92,    94,    96,    98,   100
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS.  */
+static const yytype_int8 yyrhs[] =
+{
+      25,     0,    -1,    -1,    25,    26,    -1,    27,    11,    -1,
+      28,    11,    -1,    29,    11,    -1,    32,    11,    -1,    30,
+      11,    -1,    31,    11,    -1,     1,    11,    -1,    12,    35,
+      36,    -1,    13,    37,    38,    -1,    14,    37,    34,    -1,
+      16,    37,    34,    10,    34,    -1,    17,    37,    -1,    15,
+      33,    -1,    39,    -1,    33,    39,    -1,    18,    -1,    19,
+      -1,    21,    -1,    34,     4,    34,    -1,    34,     3,    34,
+      -1,    34,     5,    34,    -1,    34,     6,    34,    -1,     3,
+      34,    -1,    22,    34,    23,    -1,     8,    34,    -1,     9,
+      34,    -1,    19,    -1,    19,    -1,    19,    -1,    19,    -1,
+      19,    -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
+static const yytype_uint16 yyrline[] =
+{
+       0,   119,   119,   120,   123,   124,   125,   126,   127,   128,
+     129,   132,   141,   156,   171,   188,   201,   219,   225,   231,
+     236,   241,   245,   252,   259,   266,   273,   280,   282,   289,
+     298,   308,   312,   316,   320
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+   First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
+static const char *const yytname[] =
+{
+  "$end", "error", "$undefined", "'-'", "'+'", "'*'", "'/'", "NEG", "'^'",
+  "'`'", "','", "EOL", "BUS", "LABEL", "SET", "CHIP", "COMPUTE", "IGNORE",
+  "FLOAT", "NAME", "ERROR", "'@'", "'('", "')'", "$accept", "input",
+  "line", "bus_statement", "label_statement", "set_statement",
+  "compute_statement", "ignore_statement", "chip_statement",
+  "chip_name_list", "expression", "bus_id", "adapter_name",
+  "function_name", "string", "chip_name", 0
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+   token YYLEX-NUM.  */
+static const yytype_uint16 yytoknum[] =
+{
+       0,   256,   257,    45,    43,    42,    47,   258,    94,    96,
+      44,   259,   260,   261,   262,   263,   264,   265,   266,   267,
+     268,    64,    40,    41
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
+static const yytype_uint8 yyr1[] =
+{
+       0,    24,    25,    25,    26,    26,    26,    26,    26,    26,
+      26,    27,    28,    29,    30,    31,    32,    33,    33,    34,
+      34,    34,    34,    34,    34,    34,    34,    34,    34,    34,
+      35,    36,    37,    38,    39
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
+static const yytype_uint8 yyr2[] =
+{
+       0,     2,     0,     2,     2,     2,     2,     2,     2,     2,
+       2,     3,     3,     3,     5,     2,     2,     1,     2,     1,
+       1,     1,     3,     3,     3,     3,     2,     3,     2,     2,
+       1,     1,     1,     1,     1
+};
+
+/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
+   Performed when YYTABLE doesn't specify something else to do.  Zero
+   means the default is an error.  */
+static const yytype_uint8 yydefact[] =
+{
+       2,     0,     1,     0,     0,     0,     0,     0,     0,     0,
+       3,     0,     0,     0,     0,     0,     0,    10,    30,     0,
+      32,     0,     0,    34,    16,    17,     0,    15,     4,     5,
+       6,     8,     9,     7,    31,    11,    33,    12,     0,     0,
+       0,    19,    20,    21,     0,    13,    18,     0,    26,    28,
+      29,     0,     0,     0,     0,     0,     0,    27,    23,    22,
+      24,    25,    14
+};
+
+/* YYDEFGOTO[NTERM-NUM].  */
+static const yytype_int8 yydefgoto[] =
+{
+      -1,     1,    10,    11,    12,    13,    14,    15,    16,    24,
+      45,    19,    35,    21,    37,    25
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+   STATE-NUM.  */
+#define YYPACT_NINF -27
+static const yytype_int8 yypact[] =
+{
+     -27,    37,   -27,    -4,    -3,     1,     1,     6,     1,     1,
+     -27,     8,    13,    20,    23,    32,    34,   -27,   -27,    29,
+     -27,    39,    14,   -27,     6,   -27,    14,   -27,   -27,   -27,
+     -27,   -27,   -27,   -27,   -27,   -27,   -27,   -27,    14,    14,
+      14,   -27,   -27,   -27,    14,    36,   -27,     5,   -27,   -27,
+     -27,    -2,    14,    14,    14,    14,    14,   -27,     0,     0,
+     -27,   -27,    36
+};
+
+/* YYPGOTO[NTERM-NUM].  */
+static const yytype_int8 yypgoto[] =
+{
+     -27,   -27,   -27,   -27,   -27,   -27,   -27,   -27,   -27,   -27,
+     -26,   -27,   -27,    38,   -27,    31
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
+   positive, shift that token.  If negative, reduce the rule which
+   number is the opposite.  If YYTABLE_NINF, syntax error.  */
+#define YYTABLE_NINF -1
+static const yytype_uint8 yytable[] =
+{
+      47,    52,    53,    54,    55,    54,    55,    17,    52,    53,
+      54,    55,    48,    49,    50,    56,    18,    38,    51,    28,
+      20,    57,    39,    40,    29,    23,    58,    59,    60,    61,
+      62,    30,    41,    42,    31,    43,    44,     2,     3,    52,
+      53,    54,    55,    32,    22,    33,    26,    27,    34,     4,
+       5,     6,     7,     8,     9,    46,     0,     0,    36
+};
+
+#define yypact_value_is_default(yystate) \
+  ((yystate) == (-27))
+
+#define yytable_value_is_error(yytable_value) \
+  YYID (0)
+
+static const yytype_int8 yycheck[] =
+{
+      26,     3,     4,     5,     6,     5,     6,    11,     3,     4,
+       5,     6,    38,    39,    40,    10,    19,     3,    44,    11,
+      19,    23,     8,     9,    11,    19,    52,    53,    54,    55,
+      56,    11,    18,    19,    11,    21,    22,     0,     1,     3,
+       4,     5,     6,    11,     6,    11,     8,     9,    19,    12,
+      13,    14,    15,    16,    17,    24,    -1,    -1,    19
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+   symbol of state STATE-NUM.  */
+static const yytype_uint8 yystos[] =
+{
+       0,    25,     0,     1,    12,    13,    14,    15,    16,    17,
+      26,    27,    28,    29,    30,    31,    32,    11,    19,    35,
+      19,    37,    37,    19,    33,    39,    37,    37,    11,    11,
+      11,    11,    11,    11,    19,    36,    19,    38,     3,     8,
+       9,    18,    19,    21,    22,    34,    39,    34,    34,    34,
+      34,    34,     3,     4,     5,     6,    10,    23,    34,    34,
+      34,    34,    34
+};
+
+#define yyerrok		(yyerrstatus = 0)
+#define yyclearin	(yychar = YYEMPTY)
+#define YYEMPTY		(-2)
+#define YYEOF		0
+
+#define YYACCEPT	goto yyacceptlab
+#define YYABORT		goto yyabortlab
+#define YYERROR		goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror.  This remains here temporarily
+   to ease the transition to the new meaning of YYERROR, for GCC.
+   Once GCC version 2 has supplanted version 1, this can go.  However,
+   YYFAIL appears to be in use.  Nevertheless, it is formally deprecated
+   in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+   discussed.  */
+
+#define YYFAIL		goto yyerrlab
+#if defined YYFAIL
+  /* This is here to suppress warnings from the GCC cpp's
+     -Wunused-macros.  Normally we don't worry about that warning, but
+     some users do, and we want to make it easy for users to remove
+     YYFAIL uses, which will produce warnings from Bison 2.5.  */
+#endif
+
+#define YYRECOVERING()  (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value)					\
+do								\
+  if (yychar == YYEMPTY && yylen == 1)				\
+    {								\
+      yychar = (Token);						\
+      yylval = (Value);						\
+      YYPOPSTACK (1);						\
+      goto yybackup;						\
+    }								\
+  else								\
+    {								\
+      yyerror (YY_("syntax error: cannot back up")); \
+      YYERROR;							\
+    }								\
+while (YYID (0))
+
+
+#define YYTERROR	1
+#define YYERRCODE	256
+
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+   If N is 0, then set CURRENT to the empty location which ends
+   the previous symbol: RHS[0] (always defined).  */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N)				\
+    do									\
+      if (YYID (N))                                                    \
+	{								\
+	  (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
+	  (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
+	  (Current).last_line    = YYRHSLOC (Rhs, N).last_line;		\
+	  (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
+	}								\
+      else								\
+	{								\
+	  (Current).first_line   = (Current).last_line   =		\
+	    YYRHSLOC (Rhs, 0).last_line;				\
+	  (Current).first_column = (Current).last_column =		\
+	    YYRHSLOC (Rhs, 0).last_column;				\
+	}								\
+    while (YYID (0))
+#endif
+
+
+/* This macro is provided for backward compatibility. */
+
+#ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments.  */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (YYLEX_PARAM)
+#else
+# define YYLEX yylex ()
+#endif
+
+/* Enable debugging if requested.  */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+#  include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args)			\
+do {						\
+  if (yydebug)					\
+    YYFPRINTF Args;				\
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)			  \
+do {									  \
+  if (yydebug)								  \
+    {									  \
+      YYFPRINTF (stderr, "%s ", Title);					  \
+      yy_symbol_print (stderr,						  \
+		  Type, Value); \
+      YYFPRINTF (stderr, "\n");						  \
+    }									  \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  if (!yyvaluep)
+    return;
+# ifdef YYPRINT
+  if (yytype < YYNTOKENS)
+    YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+  YYUSE (yyoutput);
+# endif
+  switch (yytype)
+    {
+      default:
+	break;
+    }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  if (yytype < YYNTOKENS)
+    YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+  else
+    YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+  yy_symbol_value_print (yyoutput, yytype, yyvaluep);
+  YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included).                                                   |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+    yytype_int16 *yybottom;
+    yytype_int16 *yytop;
+#endif
+{
+  YYFPRINTF (stderr, "Stack now");
+  for (; yybottom <= yytop; yybottom++)
+    {
+      int yybot = *yybottom;
+      YYFPRINTF (stderr, " %d", yybot);
+    }
+  YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top)				\
+do {								\
+  if (yydebug)							\
+    yy_stack_print ((Bottom), (Top));				\
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced.  |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule)
+    YYSTYPE *yyvsp;
+    int yyrule;
+#endif
+{
+  int yynrhs = yyr2[yyrule];
+  int yyi;
+  unsigned long int yylno = yyrline[yyrule];
+  YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+	     yyrule - 1, yylno);
+  /* The symbols being reduced.  */
+  for (yyi = 0; yyi < yynrhs; yyi++)
+    {
+      YYFPRINTF (stderr, "   $%d = ", yyi + 1);
+      yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+		       &(yyvsp[(yyi + 1) - (yynrhs)])
+		       		       );
+      YYFPRINTF (stderr, "\n");
+    }
+}
+
+# define YY_REDUCE_PRINT(Rule)		\
+do {					\
+  if (yydebug)				\
+    yy_reduce_print (yyvsp, Rule); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace.  It is left uninitialized so that
+   multiple parsers can coexist.  */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks.  */
+#ifndef	YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+   if the built-in stack extension method is used).
+
+   Do not make this value too large; the results are undefined if
+   YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+   evaluated with infinite-precision integer arithmetic.  */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+#  if defined __GLIBC__ && defined _STRING_H
+#   define yystrlen strlen
+#  else
+/* Return the length of YYSTR.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+    const char *yystr;
+#endif
+{
+  YYSIZE_T yylen;
+  for (yylen = 0; yystr[yylen]; yylen++)
+    continue;
+  return yylen;
+}
+#  endif
+# endif
+
+# ifndef yystpcpy
+#  if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+#   define yystpcpy stpcpy
+#  else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+   YYDEST.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+    char *yydest;
+    const char *yysrc;
+#endif
+{
+  char *yyd = yydest;
+  const char *yys = yysrc;
+
+  while ((*yyd++ = *yys++) != '\0')
+    continue;
+
+  return yyd - 1;
+}
+#  endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+  if (*yystr == '"')
+    {
+      YYSIZE_T yyn = 0;
+      char const *yyp = yystr;
+
+      for (;;)
+	switch (*++yyp)
+	  {
+	  case '\'':
+	  case ',':
+	    goto do_not_strip_quotes;
+
+	  case '\\':
+	    if (*++yyp != '\\')
+	      goto do_not_strip_quotes;
+	    /* Fall through.  */
+	  default:
+	    if (yyres)
+	      yyres[yyn] = *yyp;
+	    yyn++;
+	    break;
+
+	  case '"':
+	    if (yyres)
+	      yyres[yyn] = '\0';
+	    return yyn;
+	  }
+    do_not_strip_quotes: ;
+    }
+
+  if (! yyres)
+    return yystrlen (yystr);
+
+  return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN for the state stack whose top is
+   YYSSP.
+
+   Return 0 if *YYMSG was successfully written.  Return 1 if *YYMSG is
+   not large enough to hold the message.  In that case, also set
+   *YYMSG_ALLOC to the required number of bytes.  Return 2 if the
+   required number of bytes is too large to store.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+                yytype_int16 *yyssp, int yytoken)
+{
+  YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+  YYSIZE_T yysize = yysize0;
+  YYSIZE_T yysize1;
+  enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+  /* Internationalized format string. */
+  const char *yyformat = 0;
+  /* Arguments of yyformat. */
+  char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+  /* Number of reported tokens (one for the "unexpected", one per
+     "expected"). */
+  int yycount = 0;
+
+  /* There are many possibilities here to consider:
+     - Assume YYFAIL is not used.  It's too flawed to consider.  See
+       <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
+       for details.  YYERROR is fine as it does not invoke this
+       function.
+     - If this state is a consistent state with a default action, then
+       the only way this function was invoked is if the default action
+       is an error action.  In that case, don't check for expected
+       tokens because there are none.
+     - The only way there can be no lookahead present (in yychar) is if
+       this state is a consistent state with a default action.  Thus,
+       detecting the absence of a lookahead is sufficient to determine
+       that there is no unexpected or expected token to report.  In that
+       case, just report a simple "syntax error".
+     - Don't assume there isn't a lookahead just because this state is a
+       consistent state with a default action.  There might have been a
+       previous inconsistent state, consistent state with a non-default
+       action, or user semantic action that manipulated yychar.
+     - Of course, the expected token list depends on states to have
+       correct lookahead information, and it depends on the parser not
+       to perform extra reductions after fetching a lookahead from the
+       scanner and before detecting a syntax error.  Thus, state merging
+       (from LALR or IELR) and default reductions corrupt the expected
+       token list.  However, the list is correct for canonical LR with
+       one exception: it will still contain any token that will not be
+       accepted due to an error action in a later state.
+  */
+  if (yytoken != YYEMPTY)
+    {
+      int yyn = yypact[*yyssp];
+      yyarg[yycount++] = yytname[yytoken];
+      if (!yypact_value_is_default (yyn))
+        {
+          /* Start YYX at -YYN if negative to avoid negative indexes in
+             YYCHECK.  In other words, skip the first -YYN actions for
+             this state because they are default actions.  */
+          int yyxbegin = yyn < 0 ? -yyn : 0;
+          /* Stay within bounds of both yycheck and yytname.  */
+          int yychecklim = YYLAST - yyn + 1;
+          int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+          int yyx;
+
+          for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+            if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+                && !yytable_value_is_error (yytable[yyx + yyn]))
+              {
+                if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+                  {
+                    yycount = 1;
+                    yysize = yysize0;
+                    break;
+                  }
+                yyarg[yycount++] = yytname[yyx];
+                yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+                if (! (yysize <= yysize1
+                       && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+                  return 2;
+                yysize = yysize1;
+              }
+        }
+    }
+
+  switch (yycount)
+    {
+# define YYCASE_(N, S)                      \
+      case N:                               \
+        yyformat = S;                       \
+      break
+      YYCASE_(0, YY_("syntax error"));
+      YYCASE_(1, YY_("syntax error, unexpected %s"));
+      YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+      YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+      YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+      YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+    }
+
+  yysize1 = yysize + yystrlen (yyformat);
+  if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+    return 2;
+  yysize = yysize1;
+
+  if (*yymsg_alloc < yysize)
+    {
+      *yymsg_alloc = 2 * yysize;
+      if (! (yysize <= *yymsg_alloc
+             && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+        *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+      return 1;
+    }
+
+  /* Avoid sprintf, as that infringes on the user's name space.
+     Don't have undefined behavior even if the translation
+     produced a string with the wrong number of "%s"s.  */
+  {
+    char *yyp = *yymsg;
+    int yyi = 0;
+    while ((*yyp = *yyformat) != '\0')
+      if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+        {
+          yyp += yytnamerr (yyp, yyarg[yyi++]);
+          yyformat += 2;
+        }
+      else
+        {
+          yyp++;
+          yyformat++;
+        }
+  }
+  return 0;
+}
+#endif /* YYERROR_VERBOSE */
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol.  |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep)
+    const char *yymsg;
+    int yytype;
+    YYSTYPE *yyvaluep;
+#endif
+{
+  YYUSE (yyvaluep);
+
+  if (!yymsg)
+    yymsg = "Deleting";
+  YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+  switch (yytype)
+    {
+
+      default:
+	break;
+    }
+}
+
+
+/* Prevent warnings from -Wmissing-prototypes.  */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+/* The lookahead symbol.  */
+int yychar;
+
+/* The semantic value of the lookahead symbol.  */
+YYSTYPE yylval;
+
+/* Number of syntax errors so far.  */
+int yynerrs;
+
+
+/*----------.
+| yyparse.  |
+`----------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+    void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void)
+#else
+int
+yyparse ()
+
+#endif
+#endif
+{
+    int yystate;
+    /* Number of tokens to shift before error messages enabled.  */
+    int yyerrstatus;
+
+    /* The stacks and their tools:
+       `yyss': related to states.
+       `yyvs': related to semantic values.
+
+       Refer to the stacks thru separate pointers, to allow yyoverflow
+       to reallocate them elsewhere.  */
+
+    /* The state stack.  */
+    yytype_int16 yyssa[YYINITDEPTH];
+    yytype_int16 *yyss;
+    yytype_int16 *yyssp;
+
+    /* The semantic value stack.  */
+    YYSTYPE yyvsa[YYINITDEPTH];
+    YYSTYPE *yyvs;
+    YYSTYPE *yyvsp;
+
+    YYSIZE_T yystacksize;
+
+  int yyn;
+  int yyresult;
+  /* Lookahead token as an internal (translated) token number.  */
+  int yytoken;
+  /* The variables used to return semantic value and location from the
+     action routines.  */
+  YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+  /* Buffer for error messages, and its allocated size.  */
+  char yymsgbuf[128];
+  char *yymsg = yymsgbuf;
+  YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N)   (yyvsp -= (N), yyssp -= (N))
+
+  /* The number of symbols on the RHS of the reduced rule.
+     Keep to zero when no symbol should be popped.  */
+  int yylen = 0;
+
+  yytoken = 0;
+  yyss = yyssa;
+  yyvs = yyvsa;
+  yystacksize = YYINITDEPTH;
+
+  YYDPRINTF ((stderr, "Starting parse\n"));
+
+  yystate = 0;
+  yyerrstatus = 0;
+  yynerrs = 0;
+  yychar = YYEMPTY; /* Cause a token to be read.  */
+
+  /* Initialize stack pointers.
+     Waste one element of value and location stack
+     so that they stay on the same level as the state stack.
+     The wasted elements are never initialized.  */
+  yyssp = yyss;
+  yyvsp = yyvs;
+
+  goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate.  |
+`------------------------------------------------------------*/
+ yynewstate:
+  /* In all cases, when you get here, the value and location stacks
+     have just been pushed.  So pushing a state here evens the stacks.  */
+  yyssp++;
+
+ yysetstate:
+  *yyssp = yystate;
+
+  if (yyss + yystacksize - 1 <= yyssp)
+    {
+      /* Get the current used size of the three stacks, in elements.  */
+      YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+      {
+	/* Give user a chance to reallocate the stack.  Use copies of
+	   these so that the &'s don't force the real ones into
+	   memory.  */
+	YYSTYPE *yyvs1 = yyvs;
+	yytype_int16 *yyss1 = yyss;
+
+	/* Each stack pointer address is followed by the size of the
+	   data in use in that stack, in bytes.  This used to be a
+	   conditional around just the two extra args, but that might
+	   be undefined if yyoverflow is a macro.  */
+	yyoverflow (YY_("memory exhausted"),
+		    &yyss1, yysize * sizeof (*yyssp),
+		    &yyvs1, yysize * sizeof (*yyvsp),
+		    &yystacksize);
+
+	yyss = yyss1;
+	yyvs = yyvs1;
+      }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+      goto yyexhaustedlab;
+# else
+      /* Extend the stack our own way.  */
+      if (YYMAXDEPTH <= yystacksize)
+	goto yyexhaustedlab;
+      yystacksize *= 2;
+      if (YYMAXDEPTH < yystacksize)
+	yystacksize = YYMAXDEPTH;
+
+      {
+	yytype_int16 *yyss1 = yyss;
+	union yyalloc *yyptr =
+	  (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+	if (! yyptr)
+	  goto yyexhaustedlab;
+	YYSTACK_RELOCATE (yyss_alloc, yyss);
+	YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+#  undef YYSTACK_RELOCATE
+	if (yyss1 != yyssa)
+	  YYSTACK_FREE (yyss1);
+      }
+# endif
+#endif /* no yyoverflow */
+
+      yyssp = yyss + yysize - 1;
+      yyvsp = yyvs + yysize - 1;
+
+      YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+		  (unsigned long int) yystacksize));
+
+      if (yyss + yystacksize - 1 <= yyssp)
+	YYABORT;
+    }
+
+  YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+  if (yystate == YYFINAL)
+    YYACCEPT;
+
+  goto yybackup;
+
+/*-----------.
+| yybackup.  |
+`-----------*/
+yybackup:
+
+  /* Do appropriate processing given the current state.  Read a
+     lookahead token if we need one and don't already have one.  */
+
+  /* First try to decide what to do without reference to lookahead token.  */
+  yyn = yypact[yystate];
+  if (yypact_value_is_default (yyn))
+    goto yydefault;
+
+  /* Not known => get a lookahead token if don't already have one.  */
+
+  /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol.  */
+  if (yychar == YYEMPTY)
+    {
+      YYDPRINTF ((stderr, "Reading a token: "));
+      yychar = YYLEX;
+    }
+
+  if (yychar <= YYEOF)
+    {
+      yychar = yytoken = YYEOF;
+      YYDPRINTF ((stderr, "Now at end of input.\n"));
+    }
+  else
+    {
+      yytoken = YYTRANSLATE (yychar);
+      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+    }
+
+  /* If the proper action on seeing token YYTOKEN is to reduce or to
+     detect an error, take that action.  */
+  yyn += yytoken;
+  if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+    goto yydefault;
+  yyn = yytable[yyn];
+  if (yyn <= 0)
+    {
+      if (yytable_value_is_error (yyn))
+        goto yyerrlab;
+      yyn = -yyn;
+      goto yyreduce;
+    }
+
+  /* Count tokens shifted since error; after three, turn off error
+     status.  */
+  if (yyerrstatus)
+    yyerrstatus--;
+
+  /* Shift the lookahead token.  */
+  YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+  /* Discard the shifted token.  */
+  yychar = YYEMPTY;
+
+  yystate = yyn;
+  *++yyvsp = yylval;
+
+  goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state.  |
+`-----------------------------------------------------------*/
+yydefault:
+  yyn = yydefact[yystate];
+  if (yyn == 0)
+    goto yyerrlab;
+  goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction.  |
+`-----------------------------*/
+yyreduce:
+  /* yyn is the number of a rule to reduce with.  */
+  yylen = yyr2[yyn];
+
+  /* If YYLEN is nonzero, implement the default value of the action:
+     `$$ = $1'.
+
+     Otherwise, the following line sets YYVAL to garbage.
+     This behavior is undocumented and Bison
+     users should not rely upon it.  Assigning to YYVAL
+     unconditionally makes the parser a bit smaller, and it avoids a
+     GCC warning that YYVAL may be used uninitialized.  */
+  yyval = yyvsp[1-yylen];
+
+
+  YY_REDUCE_PRINT (yyn);
+  switch (yyn)
+    {
+        case 11:
+
+/* Line 1806 of yacc.c  */
+#line 133 "lib/conf-parse.y"
+    { sensors_bus new_el;
+		    new_el.line = (yyvsp[(1) - (3)].line);
+		    new_el.bus = (yyvsp[(2) - (3)].bus);
+                    new_el.adapter = (yyvsp[(3) - (3)].name);
+		    bus_add_el(&new_el);
+		  }
+    break;
+
+  case 12:
+
+/* Line 1806 of yacc.c  */
+#line 142 "lib/conf-parse.y"
+    { sensors_label new_el;
+			    if (!current_chip) {
+			      sensors_yyerror("Label statement before first chip statement");
+			      free((yyvsp[(2) - (3)].name));
+			      free((yyvsp[(3) - (3)].name));
+			      YYERROR;
+			    }
+			    new_el.line = (yyvsp[(1) - (3)].line);
+			    new_el.name = (yyvsp[(2) - (3)].name);
+			    new_el.value = (yyvsp[(3) - (3)].name);
+			    label_add_el(&new_el);
+			  }
+    break;
+
+  case 13:
+
+/* Line 1806 of yacc.c  */
+#line 157 "lib/conf-parse.y"
+    { sensors_set new_el;
+		    if (!current_chip) {
+		      sensors_yyerror("Set statement before first chip statement");
+		      free((yyvsp[(2) - (3)].name));
+		      sensors_free_expr((yyvsp[(3) - (3)].expr));
+		      YYERROR;
+		    }
+		    new_el.line = (yyvsp[(1) - (3)].line);
+		    new_el.name = (yyvsp[(2) - (3)].name);
+		    new_el.value = (yyvsp[(3) - (3)].expr);
+		    set_add_el(&new_el);
+		  }
+    break;
+
+  case 14:
+
+/* Line 1806 of yacc.c  */
+#line 172 "lib/conf-parse.y"
+    { sensors_compute new_el;
+			    if (!current_chip) {
+			      sensors_yyerror("Compute statement before first chip statement");
+			      free((yyvsp[(2) - (5)].name));
+			      sensors_free_expr((yyvsp[(3) - (5)].expr));
+			      sensors_free_expr((yyvsp[(5) - (5)].expr));
+			      YYERROR;
+			    }
+			    new_el.line = (yyvsp[(1) - (5)].line);
+			    new_el.name = (yyvsp[(2) - (5)].name);
+			    new_el.from_proc = (yyvsp[(3) - (5)].expr);
+			    new_el.to_proc = (yyvsp[(5) - (5)].expr);
+			    compute_add_el(&new_el);
+			  }
+    break;
+
+  case 15:
+
+/* Line 1806 of yacc.c  */
+#line 189 "lib/conf-parse.y"
+    { sensors_ignore new_el;
+			  if (!current_chip) {
+			    sensors_yyerror("Ignore statement before first chip statement");
+			    free((yyvsp[(2) - (2)].name));
+			    YYERROR;
+			  }
+			  new_el.line = (yyvsp[(1) - (2)].line);
+			  new_el.name = (yyvsp[(2) - (2)].name);
+			  ignore_add_el(&new_el);
+			}
+    break;
+
+  case 16:
+
+/* Line 1806 of yacc.c  */
+#line 202 "lib/conf-parse.y"
+    { sensors_chip new_el;
+		    new_el.line = (yyvsp[(1) - (2)].line);
+		    new_el.labels = NULL;
+		    new_el.sets = NULL;
+		    new_el.computes = NULL;
+		    new_el.ignores = NULL;
+		    new_el.labels_count = new_el.labels_max = 0;
+		    new_el.sets_count = new_el.sets_max = 0;
+		    new_el.computes_count = new_el.computes_max = 0;
+		    new_el.ignores_count = new_el.ignores_max = 0;
+		    new_el.chips = (yyvsp[(2) - (2)].chips);
+		    chip_add_el(&new_el);
+		    current_chip = sensors_config_chips + 
+		                   sensors_config_chips_count - 1;
+		  }
+    break;
+
+  case 17:
+
+/* Line 1806 of yacc.c  */
+#line 220 "lib/conf-parse.y"
+    { 
+		    (yyval.chips).fits = NULL;
+		    (yyval.chips).fits_count = (yyval.chips).fits_max = 0;
+		    fits_add_el(&(yyvsp[(1) - (1)].chip),(yyval.chips));
+		  }
+    break;
+
+  case 18:
+
+/* Line 1806 of yacc.c  */
+#line 226 "lib/conf-parse.y"
+    { (yyval.chips) = (yyvsp[(1) - (2)].chips);
+		    fits_add_el(&(yyvsp[(2) - (2)].chip),(yyval.chips));
+		  }
+    break;
+
+  case 19:
+
+/* Line 1806 of yacc.c  */
+#line 232 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->data.val = (yyvsp[(1) - (1)].value); 
+		    (yyval.expr)->kind = sensors_kind_val;
+		  }
+    break;
+
+  case 20:
+
+/* Line 1806 of yacc.c  */
+#line 237 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->data.var = (yyvsp[(1) - (1)].name);
+		    (yyval.expr)->kind = sensors_kind_var;
+		  }
+    break;
+
+  case 21:
+
+/* Line 1806 of yacc.c  */
+#line 242 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr();
+		    (yyval.expr)->kind = sensors_kind_source;
+		  }
+    break;
+
+  case 22:
+
+/* Line 1806 of yacc.c  */
+#line 246 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_add;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(1) - (3)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = (yyvsp[(3) - (3)].expr);
+		  }
+    break;
+
+  case 23:
+
+/* Line 1806 of yacc.c  */
+#line 253 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_sub;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(1) - (3)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = (yyvsp[(3) - (3)].expr);
+		  }
+    break;
+
+  case 24:
+
+/* Line 1806 of yacc.c  */
+#line 260 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_multiply;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(1) - (3)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = (yyvsp[(3) - (3)].expr);
+		  }
+    break;
+
+  case 25:
+
+/* Line 1806 of yacc.c  */
+#line 267 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_divide;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(1) - (3)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = (yyvsp[(3) - (3)].expr);
+		  }
+    break;
+
+  case 26:
+
+/* Line 1806 of yacc.c  */
+#line 274 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_negate;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(2) - (2)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = NULL;
+		  }
+    break;
+
+  case 27:
+
+/* Line 1806 of yacc.c  */
+#line 281 "lib/conf-parse.y"
+    { (yyval.expr) = (yyvsp[(2) - (3)].expr); }
+    break;
+
+  case 28:
+
+/* Line 1806 of yacc.c  */
+#line 283 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_exp;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(2) - (2)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = NULL;
+		  }
+    break;
+
+  case 29:
+
+/* Line 1806 of yacc.c  */
+#line 290 "lib/conf-parse.y"
+    { (yyval.expr) = malloc_expr(); 
+		    (yyval.expr)->kind = sensors_kind_sub;
+		    (yyval.expr)->data.subexpr.op = sensors_log;
+		    (yyval.expr)->data.subexpr.sub1 = (yyvsp[(2) - (2)].expr);
+		    (yyval.expr)->data.subexpr.sub2 = NULL;
+		  }
+    break;
+
+  case 30:
+
+/* Line 1806 of yacc.c  */
+#line 299 "lib/conf-parse.y"
+    { int res = sensors_parse_bus_id((yyvsp[(1) - (1)].name),&(yyval.bus));
+		    free((yyvsp[(1) - (1)].name));
+		    if (res) {
+                      sensors_yyerror("Parse error in bus id");
+		      YYERROR;
+                    }
+		  }
+    break;
+
+  case 31:
+
+/* Line 1806 of yacc.c  */
+#line 309 "lib/conf-parse.y"
+    { (yyval.name) = (yyvsp[(1) - (1)].name); }
+    break;
+
+  case 32:
+
+/* Line 1806 of yacc.c  */
+#line 313 "lib/conf-parse.y"
+    { (yyval.name) = (yyvsp[(1) - (1)].name); }
+    break;
+
+  case 33:
+
+/* Line 1806 of yacc.c  */
+#line 317 "lib/conf-parse.y"
+    { (yyval.name) = (yyvsp[(1) - (1)].name); }
+    break;
+
+  case 34:
+
+/* Line 1806 of yacc.c  */
+#line 321 "lib/conf-parse.y"
+    { int res = sensors_parse_chip_name((yyvsp[(1) - (1)].name),&(yyval.chip)); 
+		    free((yyvsp[(1) - (1)].name));
+		    if (res) {
+		      sensors_yyerror("Parse error in chip name");
+		      YYERROR;
+		    }
+		  }
+    break;
+
+
+
+/* Line 1806 of yacc.c  */
+#line 1793 "lib/conf-parse.c"
+      default: break;
+    }
+  /* User semantic actions sometimes alter yychar, and that requires
+     that yytoken be updated with the new translation.  We take the
+     approach of translating immediately before every use of yytoken.
+     One alternative is translating here after every semantic action,
+     but that translation would be missed if the semantic action invokes
+     YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+     if it invokes YYBACKUP.  In the case of YYABORT or YYACCEPT, an
+     incorrect destructor might then be invoked immediately.  In the
+     case of YYERROR or YYBACKUP, subsequent parser actions might lead
+     to an incorrect destructor call or verbose syntax error message
+     before the lookahead is translated.  */
+  YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+
+  *++yyvsp = yyval;
+
+  /* Now `shift' the result of the reduction.  Determine what state
+     that goes to, based on the state we popped back to and the rule
+     number reduced by.  */
+
+  yyn = yyr1[yyn];
+
+  yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+  if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+    yystate = yytable[yystate];
+  else
+    yystate = yydefgoto[yyn - YYNTOKENS];
+
+  goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+  /* Make sure we have latest lookahead translation.  See comments at
+     user semantic actions for why this is necessary.  */
+  yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
+  /* If not already recovering from an error, report this error.  */
+  if (!yyerrstatus)
+    {
+      ++yynerrs;
+#if ! YYERROR_VERBOSE
+      yyerror (YY_("syntax error"));
+#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+                                        yyssp, yytoken)
+      {
+        char const *yymsgp = YY_("syntax error");
+        int yysyntax_error_status;
+        yysyntax_error_status = YYSYNTAX_ERROR;
+        if (yysyntax_error_status == 0)
+          yymsgp = yymsg;
+        else if (yysyntax_error_status == 1)
+          {
+            if (yymsg != yymsgbuf)
+              YYSTACK_FREE (yymsg);
+            yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+            if (!yymsg)
+              {
+                yymsg = yymsgbuf;
+                yymsg_alloc = sizeof yymsgbuf;
+                yysyntax_error_status = 2;
+              }
+            else
+              {
+                yysyntax_error_status = YYSYNTAX_ERROR;
+                yymsgp = yymsg;
+              }
+          }
+        yyerror (yymsgp);
+        if (yysyntax_error_status == 2)
+          goto yyexhaustedlab;
+      }
+# undef YYSYNTAX_ERROR
+#endif
+    }
+
+
+
+  if (yyerrstatus == 3)
+    {
+      /* If just tried and failed to reuse lookahead token after an
+	 error, discard it.  */
+
+      if (yychar <= YYEOF)
+	{
+	  /* Return failure if at end of input.  */
+	  if (yychar == YYEOF)
+	    YYABORT;
+	}
+      else
+	{
+	  yydestruct ("Error: discarding",
+		      yytoken, &yylval);
+	  yychar = YYEMPTY;
+	}
+    }
+
+  /* Else will try to reuse lookahead token after shifting the error
+     token.  */
+  goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR.  |
+`---------------------------------------------------*/
+yyerrorlab:
+
+  /* Pacify compilers like GCC when the user code never invokes
+     YYERROR and the label yyerrorlab therefore never appears in user
+     code.  */
+  if (/*CONSTCOND*/ 0)
+     goto yyerrorlab;
+
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYERROR.  */
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+  yystate = *yyssp;
+  goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR.  |
+`-------------------------------------------------------------*/
+yyerrlab1:
+  yyerrstatus = 3;	/* Each real token shifted decrements this.  */
+
+  for (;;)
+    {
+      yyn = yypact[yystate];
+      if (!yypact_value_is_default (yyn))
+	{
+	  yyn += YYTERROR;
+	  if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+	    {
+	      yyn = yytable[yyn];
+	      if (0 < yyn)
+		break;
+	    }
+	}
+
+      /* Pop the current state because it cannot handle the error token.  */
+      if (yyssp == yyss)
+	YYABORT;
+
+
+      yydestruct ("Error: popping",
+		  yystos[yystate], yyvsp);
+      YYPOPSTACK (1);
+      yystate = *yyssp;
+      YY_STACK_PRINT (yyss, yyssp);
+    }
+
+  *++yyvsp = yylval;
+
+
+  /* Shift the error token.  */
+  YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+  yystate = yyn;
+  goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here.  |
+`-------------------------------------*/
+yyacceptlab:
+  yyresult = 0;
+  goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here.  |
+`-----------------------------------*/
+yyabortlab:
+  yyresult = 1;
+  goto yyreturn;
+
+#if !defined(yyoverflow) || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here.  |
+`-------------------------------------------------*/
+yyexhaustedlab:
+  yyerror (YY_("memory exhausted"));
+  yyresult = 2;
+  /* Fall through.  */
+#endif
+
+yyreturn:
+  if (yychar != YYEMPTY)
+    {
+      /* Make sure we have latest lookahead translation.  See comments at
+         user semantic actions for why this is necessary.  */
+      yytoken = YYTRANSLATE (yychar);
+      yydestruct ("Cleanup: discarding lookahead",
+                  yytoken, &yylval);
+    }
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYABORT or YYACCEPT.  */
+  YYPOPSTACK (yylen);
+  YY_STACK_PRINT (yyss, yyssp);
+  while (yyssp != yyss)
+    {
+      yydestruct ("Cleanup: popping",
+		  yystos[*yyssp], yyvsp);
+      YYPOPSTACK (1);
+    }
+#ifndef yyoverflow
+  if (yyss != yyssa)
+    YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+  if (yymsg != yymsgbuf)
+    YYSTACK_FREE (yymsg);
+#endif
+  /* Make sure YYID is used.  */
+  return YYID (yyresult);
+}
+
+
+
+/* Line 2067 of yacc.c  */
+#line 330 "lib/conf-parse.y"
+
+
+void sensors_yyerror(const char *err)
+{
+  if (sensors_lex_error[0]) {
+    sensors_parse_error_wfn(sensors_lex_error, sensors_yyfilename, sensors_yylineno);
+    sensors_lex_error[0] = '\0';
+  } else
+    sensors_parse_error_wfn(err, sensors_yyfilename, sensors_yylineno);
+}
+
+sensors_expr *malloc_expr(void)
+{
+  sensors_expr *res = malloc(sizeof(sensors_expr));
+  if (! res)
+    sensors_fatal_error(__func__, "Allocating a new expression");
+  return res;
+}
+
diff --git a/tools/gator/daemon/libsensors/conf-parse.h b/tools/gator/daemon/libsensors/conf-parse.h
new file mode 100644
index 000000000000..89c9c1a0d59f
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf-parse.h
@@ -0,0 +1,84 @@
+/* A Bison parser, made by GNU Bison 2.5.  */
+
+/* Bison interface for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     NEG = 258,
+     EOL = 259,
+     BUS = 260,
+     LABEL = 261,
+     SET = 262,
+     CHIP = 263,
+     COMPUTE = 264,
+     IGNORE = 265,
+     FLOAT = 266,
+     NAME = 267,
+     ERROR = 268
+   };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 2068 of yacc.c  */
+#line 79 "lib/conf-parse.y"
+
+  double value;
+  char *name;
+  void *nothing;
+  sensors_chip_name_list chips;
+  sensors_expr *expr;
+  sensors_bus_id bus;
+  sensors_chip_name chip;
+  sensors_config_line line;
+
+
+
+/* Line 2068 of yacc.c  */
+#line 76 "lib/conf-parse.h"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+extern YYSTYPE sensors_yylval;
+
+
diff --git a/tools/gator/daemon/libsensors/conf-parse.y b/tools/gator/daemon/libsensors/conf-parse.y
new file mode 100644
index 000000000000..1937f544dc66
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf-parse.y
@@ -0,0 +1,347 @@
+%{
+/*
+    conf-parse.y - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#define YYERROR_VERBOSE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "data.h"
+#include "general.h"
+#include "error.h"
+#include "conf.h"
+#include "access.h"
+#include "init.h"
+
+static void sensors_yyerror(const char *err);
+static sensors_expr *malloc_expr(void);
+
+static sensors_chip *current_chip = NULL;
+
+#define bus_add_el(el) sensors_add_array_el(el,\
+                                      &sensors_config_busses,\
+                                      &sensors_config_busses_count,\
+                                      &sensors_config_busses_max,\
+                                      sizeof(sensors_bus))
+#define label_add_el(el) sensors_add_array_el(el,\
+                                        &current_chip->labels,\
+                                        &current_chip->labels_count,\
+                                        &current_chip->labels_max,\
+                                        sizeof(sensors_label));
+#define set_add_el(el) sensors_add_array_el(el,\
+                                      &current_chip->sets,\
+                                      &current_chip->sets_count,\
+                                      &current_chip->sets_max,\
+                                      sizeof(sensors_set));
+#define compute_add_el(el) sensors_add_array_el(el,\
+                                          &current_chip->computes,\
+                                          &current_chip->computes_count,\
+                                          &current_chip->computes_max,\
+                                          sizeof(sensors_compute));
+#define ignore_add_el(el) sensors_add_array_el(el,\
+                                          &current_chip->ignores,\
+                                          &current_chip->ignores_count,\
+                                          &current_chip->ignores_max,\
+                                          sizeof(sensors_ignore));
+#define chip_add_el(el) sensors_add_array_el(el,\
+                                       &sensors_config_chips,\
+                                       &sensors_config_chips_count,\
+                                       &sensors_config_chips_max,\
+                                       sizeof(sensors_chip));
+
+#define fits_add_el(el,list) sensors_add_array_el(el,\
+                                                  &(list).fits,\
+                                                  &(list).fits_count,\
+                                                  &(list).fits_max, \
+		                                  sizeof(sensors_chip_name));
+
+%}
+
+%union {
+  double value;
+  char *name;
+  void *nothing;
+  sensors_chip_name_list chips;
+  sensors_expr *expr;
+  sensors_bus_id bus;
+  sensors_chip_name chip;
+  sensors_config_line line;
+}  
+
+%left <nothing> '-' '+'
+%left <nothing> '*' '/'
+%left <nothing> NEG
+%right <nothing> '^' '`'
+
+%token <nothing> ','
+%token <nothing> EOL
+%token <line> BUS
+%token <line> LABEL
+%token <line> SET
+%token <line> CHIP
+%token <line> COMPUTE
+%token <line> IGNORE
+%token <value> FLOAT
+%token <name> NAME
+%token <nothing> ERROR
+
+%type <chips> chip_name_list
+%type <expr> expression
+%type <bus> bus_id
+%type <name> adapter_name
+%type <name> function_name
+%type <name> string
+%type <chip> chip_name
+
+%start input
+
+%%
+
+input:	  /* empty */
+	| input line
+;
+
+line:	  bus_statement EOL
+	| label_statement EOL
+	| set_statement EOL
+	| chip_statement EOL
+	| compute_statement EOL
+	| ignore_statement EOL
+	| error	EOL
+;
+
+bus_statement:	  BUS bus_id adapter_name
+		  { sensors_bus new_el;
+		    new_el.line = $1;
+		    new_el.bus = $2;
+                    new_el.adapter = $3;
+		    bus_add_el(&new_el);
+		  }
+;
+
+label_statement:	  LABEL function_name string
+			  { sensors_label new_el;
+			    if (!current_chip) {
+			      sensors_yyerror("Label statement before first chip statement");
+			      free($2);
+			      free($3);
+			      YYERROR;
+			    }
+			    new_el.line = $1;
+			    new_el.name = $2;
+			    new_el.value = $3;
+			    label_add_el(&new_el);
+			  }
+;
+
+set_statement:	  SET function_name expression
+		  { sensors_set new_el;
+		    if (!current_chip) {
+		      sensors_yyerror("Set statement before first chip statement");
+		      free($2);
+		      sensors_free_expr($3);
+		      YYERROR;
+		    }
+		    new_el.line = $1;
+		    new_el.name = $2;
+		    new_el.value = $3;
+		    set_add_el(&new_el);
+		  }
+;
+
+compute_statement:	  COMPUTE function_name expression ',' expression
+			  { sensors_compute new_el;
+			    if (!current_chip) {
+			      sensors_yyerror("Compute statement before first chip statement");
+			      free($2);
+			      sensors_free_expr($3);
+			      sensors_free_expr($5);
+			      YYERROR;
+			    }
+			    new_el.line = $1;
+			    new_el.name = $2;
+			    new_el.from_proc = $3;
+			    new_el.to_proc = $5;
+			    compute_add_el(&new_el);
+			  }
+;
+
+ignore_statement:	IGNORE function_name
+			{ sensors_ignore new_el;
+			  if (!current_chip) {
+			    sensors_yyerror("Ignore statement before first chip statement");
+			    free($2);
+			    YYERROR;
+			  }
+			  new_el.line = $1;
+			  new_el.name = $2;
+			  ignore_add_el(&new_el);
+			}
+;
+
+chip_statement:	  CHIP chip_name_list
+		  { sensors_chip new_el;
+		    new_el.line = $1;
+		    new_el.labels = NULL;
+		    new_el.sets = NULL;
+		    new_el.computes = NULL;
+		    new_el.ignores = NULL;
+		    new_el.labels_count = new_el.labels_max = 0;
+		    new_el.sets_count = new_el.sets_max = 0;
+		    new_el.computes_count = new_el.computes_max = 0;
+		    new_el.ignores_count = new_el.ignores_max = 0;
+		    new_el.chips = $2;
+		    chip_add_el(&new_el);
+		    current_chip = sensors_config_chips + 
+		                   sensors_config_chips_count - 1;
+		  }
+;
+
+chip_name_list:	  chip_name
+		  { 
+		    $$.fits = NULL;
+		    $$.fits_count = $$.fits_max = 0;
+		    fits_add_el(&$1,$$);
+		  }
+		| chip_name_list chip_name
+		  { $$ = $1;
+		    fits_add_el(&$2,$$);
+		  }
+;
+	
+expression:	  FLOAT	
+		  { $$ = malloc_expr(); 
+		    $$->data.val = $1; 
+		    $$->kind = sensors_kind_val;
+		  }
+		| NAME
+		  { $$ = malloc_expr(); 
+		    $$->data.var = $1;
+		    $$->kind = sensors_kind_var;
+		  }
+		| '@'
+		  { $$ = malloc_expr();
+		    $$->kind = sensors_kind_source;
+		  }
+		| expression '+' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_add;
+		    $$->data.subexpr.sub1 = $1;
+		    $$->data.subexpr.sub2 = $3;
+		  }
+		| expression '-' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_sub;
+		    $$->data.subexpr.sub1 = $1;
+		    $$->data.subexpr.sub2 = $3;
+		  }
+		| expression '*' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_multiply;
+		    $$->data.subexpr.sub1 = $1;
+		    $$->data.subexpr.sub2 = $3;
+		  }
+		| expression '/' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_divide;
+		    $$->data.subexpr.sub1 = $1;
+		    $$->data.subexpr.sub2 = $3;
+		  }
+		| '-' expression  %prec NEG
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_negate;
+		    $$->data.subexpr.sub1 = $2;
+		    $$->data.subexpr.sub2 = NULL;
+		  }
+		| '(' expression ')'
+		  { $$ = $2; }
+		| '^' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_exp;
+		    $$->data.subexpr.sub1 = $2;
+		    $$->data.subexpr.sub2 = NULL;
+		  }
+		| '`' expression
+		  { $$ = malloc_expr(); 
+		    $$->kind = sensors_kind_sub;
+		    $$->data.subexpr.op = sensors_log;
+		    $$->data.subexpr.sub1 = $2;
+		    $$->data.subexpr.sub2 = NULL;
+		  }
+;
+
+bus_id:		  NAME
+		  { int res = sensors_parse_bus_id($1,&$$);
+		    free($1);
+		    if (res) {
+                      sensors_yyerror("Parse error in bus id");
+		      YYERROR;
+                    }
+		  }
+;
+
+adapter_name:	  NAME
+		  { $$ = $1; }
+;
+
+function_name:	  NAME
+		  { $$ = $1; }
+;
+
+string:	  NAME
+	  { $$ = $1; }
+;
+
+chip_name:	  NAME
+		  { int res = sensors_parse_chip_name($1,&$$); 
+		    free($1);
+		    if (res) {
+		      sensors_yyerror("Parse error in chip name");
+		      YYERROR;
+		    }
+		  }
+;
+
+%%
+
+void sensors_yyerror(const char *err)
+{
+  if (sensors_lex_error[0]) {
+    sensors_parse_error_wfn(sensors_lex_error, sensors_yyfilename, sensors_yylineno);
+    sensors_lex_error[0] = '\0';
+  } else
+    sensors_parse_error_wfn(err, sensors_yyfilename, sensors_yylineno);
+}
+
+sensors_expr *malloc_expr(void)
+{
+  sensors_expr *res = malloc(sizeof(sensors_expr));
+  if (! res)
+    sensors_fatal_error(__func__, "Allocating a new expression");
+  return res;
+}
diff --git a/tools/gator/daemon/libsensors/conf.h b/tools/gator/daemon/libsensors/conf.h
new file mode 100644
index 000000000000..b7ce4f7ca833
--- /dev/null
+++ b/tools/gator/daemon/libsensors/conf.h
@@ -0,0 +1,34 @@
+/*
+    conf.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_CONF_H
+#define LIB_SENSORS_CONF_H
+
+/* This is defined in conf-lex.l */
+int sensors_yylex(void);
+extern char sensors_lex_error[];
+extern const char *sensors_yyfilename;
+extern int sensors_yylineno;
+extern FILE *sensors_yyin;
+
+/* This is defined in conf-parse.y */
+int sensors_yyparse(void);
+
+#endif /* LIB_SENSORS_CONF_H */
diff --git a/tools/gator/daemon/libsensors/data.c b/tools/gator/daemon/libsensors/data.c
new file mode 100644
index 000000000000..cac9c8dc6eb1
--- /dev/null
+++ b/tools/gator/daemon/libsensors/data.c
@@ -0,0 +1,278 @@
+/*
+    data.c - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007, 2009  Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+/*** This file modified by ARM on Jan 23, 2013 to move version.h to the current directory. ***/
+
+/* this define needed for strndup() */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "access.h"
+#include "error.h"
+#include "data.h"
+#include "sensors.h"
+#include "version.h"
+
+const char *libsensors_version = LM_VERSION;
+
+char **sensors_config_files = NULL;
+int sensors_config_files_count = 0;
+int sensors_config_files_max = 0;
+
+sensors_chip *sensors_config_chips = NULL;
+int sensors_config_chips_count = 0;
+int sensors_config_chips_subst = 0;
+int sensors_config_chips_max = 0;
+
+sensors_bus *sensors_config_busses = NULL;
+int sensors_config_busses_count = 0;
+int sensors_config_busses_max = 0;
+
+sensors_chip_features *sensors_proc_chips = NULL;
+int sensors_proc_chips_count = 0;
+int sensors_proc_chips_max = 0;
+
+sensors_bus *sensors_proc_bus = NULL;
+int sensors_proc_bus_count = 0;
+int sensors_proc_bus_max = 0;
+
+void sensors_free_chip_name(sensors_chip_name *chip)
+{
+	free(chip->prefix);
+}
+
+/*
+   Parse a chip name to the internal representation. These are valid names:
+
+     lm78-i2c-10-5e		*-i2c-10-5e
+     lm78-i2c-10-*		*-i2c-10-*
+     lm78-i2c-*-5e		*-i2c-*-5e
+     lm78-i2c-*-*		*-i2c-*-*
+     lm78-isa-10dd		*-isa-10dd
+     lm78-isa-*			*-isa-*
+     lm78-*			*-*
+
+   Here 'lm78' can be any prefix. 'i2c' and 'isa' are
+   literal strings, just like all dashes '-' and wildcards '*'. '10' can
+   be any decimal i2c bus number. '5e' can be any hexadecimal i2c device
+   address, and '10dd' any hexadecimal isa address.
+
+   The 'prefix' part in the result is freshly allocated. All old contents
+   of res is overwritten. res itself is not allocated. In case of an error
+   return (ie. != 0), res is undefined, but all allocations are undone.
+*/
+
+int sensors_parse_chip_name(const char *name, sensors_chip_name *res)
+{
+	char *dash;
+
+	/* First, the prefix. It's either "*" or a real chip name. */
+	if (!strncmp(name, "*-", 2)) {
+		res->prefix = SENSORS_CHIP_NAME_PREFIX_ANY;
+		name += 2;
+	} else {
+		if (!(dash = strchr(name, '-')))
+			return -SENSORS_ERR_CHIP_NAME;
+		res->prefix = strndup(name, dash - name);
+		if (!res->prefix)
+			sensors_fatal_error(__func__,
+					    "Allocating name prefix");
+		name = dash + 1;
+	}
+
+	/* Then we have either a sole "*" (all chips with this name) or a bus
+	   type and an address. */
+	if (!strcmp(name, "*")) {
+		res->bus.type = SENSORS_BUS_TYPE_ANY;
+		res->bus.nr = SENSORS_BUS_NR_ANY;
+		res->addr = SENSORS_CHIP_NAME_ADDR_ANY;
+		return 0;
+	}
+
+	if (!(dash = strchr(name, '-')))
+		goto ERROR;
+	if (!strncmp(name, "i2c", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_I2C;
+	else if (!strncmp(name, "isa", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_ISA;
+	else if (!strncmp(name, "pci", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_PCI;
+	else if (!strncmp(name, "spi", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_SPI;
+	else if (!strncmp(name, "virtual", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_VIRTUAL;
+	else if (!strncmp(name, "acpi", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_ACPI;
+	else if (!strncmp(name, "hid", dash - name))
+		res->bus.type = SENSORS_BUS_TYPE_HID;
+	else
+		goto ERROR;
+	name = dash + 1;
+
+	/* Some bus types (i2c, spi) have an additional bus number.
+	   For these, the next part is either a "*" (any bus of that type)
+	   or a decimal number. */
+	switch (res->bus.type) {
+	case SENSORS_BUS_TYPE_I2C:
+	case SENSORS_BUS_TYPE_SPI:
+	case SENSORS_BUS_TYPE_HID:
+		if (!strncmp(name, "*-", 2)) {
+			res->bus.nr = SENSORS_BUS_NR_ANY;
+			name += 2;
+			break;
+		}
+
+		res->bus.nr = strtoul(name, &dash, 10);
+		if (*name == '\0' || *dash != '-' || res->bus.nr < 0)
+			goto ERROR;
+		name = dash + 1;
+		break;
+	default:
+		res->bus.nr = SENSORS_BUS_NR_ANY;
+	}
+
+	/* Last part is the chip address, or "*" for any address. */
+	if (!strcmp(name, "*")) {
+		res->addr = SENSORS_CHIP_NAME_ADDR_ANY;
+	} else {
+		res->addr = strtoul(name, &dash, 16);
+		if (*name == '\0' || *dash != '\0' || res->addr < 0)
+			goto ERROR;
+	}
+
+	return 0;
+
+ERROR:
+	free(res->prefix);
+	return -SENSORS_ERR_CHIP_NAME;
+}
+
+int sensors_snprintf_chip_name(char *str, size_t size,
+			       const sensors_chip_name *chip)
+{
+	if (sensors_chip_name_has_wildcards(chip))
+		return -SENSORS_ERR_WILDCARDS;
+
+	switch (chip->bus.type) {
+	case SENSORS_BUS_TYPE_ISA:
+		return snprintf(str, size, "%s-isa-%04x", chip->prefix,
+				chip->addr);
+	case SENSORS_BUS_TYPE_PCI:
+		return snprintf(str, size, "%s-pci-%04x", chip->prefix,
+				chip->addr);
+	case SENSORS_BUS_TYPE_I2C:
+		return snprintf(str, size, "%s-i2c-%hd-%02x", chip->prefix,
+				chip->bus.nr, chip->addr);
+	case SENSORS_BUS_TYPE_SPI:
+		return snprintf(str, size, "%s-spi-%hd-%x", chip->prefix,
+				chip->bus.nr, chip->addr);
+	case SENSORS_BUS_TYPE_VIRTUAL:
+		return snprintf(str, size, "%s-virtual-%x", chip->prefix,
+				chip->addr);
+	case SENSORS_BUS_TYPE_ACPI:
+		return snprintf(str, size, "%s-acpi-%x", chip->prefix,
+				chip->addr);
+	case SENSORS_BUS_TYPE_HID:
+		return snprintf(str, size, "%s-hid-%hd-%x", chip->prefix,
+				chip->bus.nr, chip->addr);
+	}
+
+	return -SENSORS_ERR_CHIP_NAME;
+}
+
+int sensors_parse_bus_id(const char *name, sensors_bus_id *bus)
+{
+	char *endptr;
+
+	if (strncmp(name, "i2c-", 4)) {
+		return -SENSORS_ERR_BUS_NAME;
+	}
+	name += 4;
+	bus->type = SENSORS_BUS_TYPE_I2C;
+	bus->nr = strtoul(name, &endptr, 10);
+	if (*name == '\0' || *endptr != '\0' || bus->nr < 0)
+		return -SENSORS_ERR_BUS_NAME;
+	return 0;
+}
+
+static int sensors_substitute_chip(sensors_chip_name *name,
+				   const char *filename, int lineno)
+{
+	int i, j;
+	for (i = 0; i < sensors_config_busses_count; i++)
+		if (sensors_config_busses[i].bus.type == name->bus.type &&
+		    sensors_config_busses[i].bus.nr == name->bus.nr)
+			break;
+
+	if (i == sensors_config_busses_count) {
+		sensors_parse_error_wfn("Undeclared bus id referenced",
+					filename, lineno);
+		name->bus.nr = SENSORS_BUS_NR_IGNORE;
+		return -SENSORS_ERR_BUS_NAME;
+	}
+
+	/* Compare the adapter names */
+	for (j = 0; j < sensors_proc_bus_count; j++) {
+		if (!strcmp(sensors_config_busses[i].adapter,
+			    sensors_proc_bus[j].adapter)) {
+			name->bus.nr = sensors_proc_bus[j].bus.nr;
+			return 0;
+		}
+	}
+
+	/* We did not find a matching bus name, simply ignore this chip
+	   config entry. */
+	name->bus.nr = SENSORS_BUS_NR_IGNORE;
+	return 0;
+}
+
+/* Bus substitution is on a per-configuration file basis, so we keep
+   memory (in sensors_config_chips_subst) of which chip entries have been
+   already substituted. */
+int sensors_substitute_busses(void)
+{
+	int err, i, j, lineno;
+	sensors_chip_name_list *chips;
+	const char *filename;
+	int res = 0;
+
+	for (i = sensors_config_chips_subst;
+	     i < sensors_config_chips_count; i++) {
+		filename = sensors_config_chips[i].line.filename;
+		lineno = sensors_config_chips[i].line.lineno;
+		chips = &sensors_config_chips[i].chips;
+		for (j = 0; j < chips->fits_count; j++) {
+			/* We can only substitute if a specific bus number
+			   is given. */
+			if (chips->fits[j].bus.nr == SENSORS_BUS_NR_ANY)
+				continue;
+
+			err = sensors_substitute_chip(&chips->fits[j],
+						      filename, lineno);
+			if (err)
+				res = err;
+		}
+	}
+	sensors_config_chips_subst = sensors_config_chips_count;
+	return res;
+}
diff --git a/tools/gator/daemon/libsensors/data.h b/tools/gator/daemon/libsensors/data.h
new file mode 100644
index 000000000000..4a23eabe1414
--- /dev/null
+++ b/tools/gator/daemon/libsensors/data.h
@@ -0,0 +1,184 @@
+/*
+    data.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007, 2009  Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_DATA_H
+#define LIB_SENSORS_DATA_H
+
+#include "sensors.h"
+#include "general.h"
+
+/* This header file contains all kinds of data structures which are used
+   for the representation of the config file data and the sensors
+   data. */
+
+/* Kinds of expression operators recognized */
+typedef enum sensors_operation {
+	sensors_add, sensors_sub, sensors_multiply, sensors_divide,
+	sensors_negate, sensors_exp, sensors_log,
+} sensors_operation;
+
+/* An expression can have several forms */
+typedef enum sensors_expr_kind {
+	sensors_kind_val, sensors_kind_source, sensors_kind_var,
+	sensors_kind_sub
+} sensors_expr_kind;
+
+/* An expression. It is either a floating point value, a variable name,
+   an operation on subexpressions, or the special value 'sub' } */
+struct sensors_expr;
+
+typedef struct sensors_subexpr {
+	sensors_operation op;
+	struct sensors_expr *sub1;
+	struct sensors_expr *sub2;
+} sensors_subexpr;
+
+typedef struct sensors_expr {
+	sensors_expr_kind kind;
+	union {
+		double val;
+		char *var;
+		sensors_subexpr subexpr;
+	} data;
+} sensors_expr;
+
+/* Config file line reference */
+typedef struct sensors_config_line {
+	const char *filename;
+	int lineno;
+} sensors_config_line;
+
+/* Config file label declaration: a feature name, combined with the label
+   value */
+typedef struct sensors_label {
+	char *name;
+	char *value;
+	sensors_config_line line;
+} sensors_label;
+
+/* Config file set declaration: a subfeature name, combined with an
+   expression */
+typedef struct sensors_set {
+	char *name;
+	sensors_expr *value;
+	sensors_config_line line;
+} sensors_set;
+
+/* Config file compute declaration: a feature name, combined with two
+   expressions */
+typedef struct sensors_compute {
+	char *name;
+	sensors_expr *from_proc;
+	sensors_expr *to_proc;
+	sensors_config_line line;
+} sensors_compute;
+
+/* Config file ignore declaration: a feature name */
+typedef struct sensors_ignore {
+	char *name;
+	sensors_config_line line;
+} sensors_ignore;
+
+/* A list of chip names, used to represent a config file chips declaration */
+typedef struct sensors_chip_name_list {
+	sensors_chip_name *fits;
+	int fits_count;
+	int fits_max;
+} sensors_chip_name_list;
+
+/* A config file chip block */
+typedef struct sensors_chip {
+	sensors_chip_name_list chips;
+	sensors_label *labels;
+	int labels_count;
+	int labels_max;
+	sensors_set *sets;
+	int sets_count;
+	int sets_max;
+	sensors_compute *computes;
+	int computes_count;
+	int computes_max;
+	sensors_ignore *ignores;
+	int ignores_count;
+	int ignores_max;
+	sensors_config_line line;
+} sensors_chip;
+
+/* Config file bus declaration: the bus type and number, combined with adapter
+   name */
+typedef struct sensors_bus {
+	char *adapter;
+	sensors_bus_id bus;
+	sensors_config_line line;
+} sensors_bus;
+
+/* Internal data about all features and subfeatures of a chip */
+typedef struct sensors_chip_features {
+	struct sensors_chip_name chip;
+	struct sensors_feature *feature;
+	struct sensors_subfeature *subfeature;
+	int feature_count;
+	int subfeature_count;
+} sensors_chip_features;
+
+extern char **sensors_config_files;
+extern int sensors_config_files_count;
+extern int sensors_config_files_max;
+
+#define sensors_add_config_files(el) sensors_add_array_el( \
+	(el), &sensors_config_files, &sensors_config_files_count, \
+	&sensors_config_files_max, sizeof(char *))
+
+extern sensors_chip *sensors_config_chips;
+extern int sensors_config_chips_count;
+extern int sensors_config_chips_subst;
+extern int sensors_config_chips_max;
+
+extern sensors_bus *sensors_config_busses;
+extern int sensors_config_busses_count;
+extern int sensors_config_busses_max;
+
+extern sensors_chip_features *sensors_proc_chips;
+extern int sensors_proc_chips_count;
+extern int sensors_proc_chips_max;
+
+#define sensors_add_proc_chips(el) sensors_add_array_el( \
+	(el), &sensors_proc_chips, &sensors_proc_chips_count,\
+	&sensors_proc_chips_max, sizeof(struct sensors_chip_features))
+
+extern sensors_bus *sensors_proc_bus;
+extern int sensors_proc_bus_count;
+extern int sensors_proc_bus_max;
+
+#define sensors_add_proc_bus(el) sensors_add_array_el( \
+	(el), &sensors_proc_bus, &sensors_proc_bus_count,\
+	&sensors_proc_bus_max, sizeof(struct sensors_bus))
+
+/* Substitute configuration bus numbers with real-world bus numbers
+   in the chips lists */
+int sensors_substitute_busses(void);
+
+
+/* Parse a bus id into its components. Returns 0 on success, a value from
+   error.h on failure. */
+int sensors_parse_bus_id(const char *name, sensors_bus_id *bus);
+
+#endif /* def LIB_SENSORS_DATA_H */
diff --git a/tools/gator/daemon/libsensors/error.c b/tools/gator/daemon/libsensors/error.c
new file mode 100644
index 000000000000..55bde81295fb
--- /dev/null
+++ b/tools/gator/daemon/libsensors/error.c
@@ -0,0 +1,92 @@
+/*
+    error.c - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007-2009   Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "error.h"
+#include "general.h"
+
+static void sensors_default_parse_error(const char *err, int lineno);
+static void sensors_default_parse_error_wfn(const char *err,
+					    const char *filename, int lineno);
+static void sensors_default_fatal_error(const char *proc, const char *err);
+
+void (*sensors_parse_error) (const char *err, int lineno) =
+						sensors_default_parse_error;
+void (*sensors_parse_error_wfn) (const char *err, const char *filename,
+				 int lineno) = sensors_default_parse_error_wfn;
+void (*sensors_fatal_error) (const char *proc, const char *err) =
+						sensors_default_fatal_error;
+
+static const char *errorlist[] = {
+	/* Invalid error code    */ "Unknown error",
+	/* SENSORS_ERR_WILDCARDS */ "Wildcard found in chip name",
+	/* SENSORS_ERR_NO_ENTRY  */ "No such subfeature known",
+	/* SENSORS_ERR_ACCESS_R  */ "Can't read",
+	/* SENSORS_ERR_KERNEL    */ "Kernel interface error",
+	/* SENSORS_ERR_DIV_ZERO  */ "Divide by zero",
+	/* SENSORS_ERR_CHIP_NAME */ "Can't parse chip name",
+	/* SENSORS_ERR_BUS_NAME  */ "Can't parse bus name",
+	/* SENSORS_ERR_PARSE     */ "General parse error",
+	/* SENSORS_ERR_ACCESS_W  */ "Can't write",
+	/* SENSORS_ERR_IO        */ "I/O error",
+	/* SENSORS_ERR_RECURSION */ "Evaluation recurses too deep",
+};
+
+const char *sensors_strerror(int errnum)
+{
+	if (errnum < 0)
+		errnum = -errnum;
+	if (errnum >= ARRAY_SIZE(errorlist))
+		errnum = 0;
+	return errorlist[errnum];
+}
+
+void sensors_default_parse_error(const char *err, int lineno)
+{
+	if (lineno)
+		fprintf(stderr, "Error: Line %d: %s\n", lineno, err);
+	else
+		fprintf(stderr, "Error: %s\n", err);
+}
+
+void sensors_default_parse_error_wfn(const char *err,
+				     const char *filename, int lineno)
+{
+	/* If application provided a custom parse error reporting function
+	   but not the variant with the filename, fall back to the original
+	   variant without the filename, for backwards compatibility. */
+	if (sensors_parse_error != sensors_default_parse_error ||
+	    !filename)
+		return sensors_parse_error(err, lineno);
+
+	if (lineno)
+		fprintf(stderr, "Error: File %s, line %d: %s\n", filename,
+			lineno, err);
+	else
+		fprintf(stderr, "Error: File %s: %s\n", filename, err);
+}
+
+void sensors_default_fatal_error(const char *proc, const char *err)
+{
+	fprintf(stderr, "Fatal error in `%s': %s\n", proc, err);
+	exit(1);
+}
diff --git a/tools/gator/daemon/libsensors/error.h b/tools/gator/daemon/libsensors/error.h
new file mode 100644
index 000000000000..37cdc956151d
--- /dev/null
+++ b/tools/gator/daemon/libsensors/error.h
@@ -0,0 +1,74 @@
+/*
+    error.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007-2009   Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_ERROR_H
+#define LIB_SENSORS_ERROR_H
+
+#define SENSORS_ERR_WILDCARDS	1 /* Wildcard found in chip name */
+#define SENSORS_ERR_NO_ENTRY	2 /* No such subfeature known */
+#define SENSORS_ERR_ACCESS_R	3 /* Can't read */
+#define SENSORS_ERR_KERNEL	4 /* Kernel interface error */
+#define SENSORS_ERR_DIV_ZERO	5 /* Divide by zero */
+#define SENSORS_ERR_CHIP_NAME	6 /* Can't parse chip name */
+#define SENSORS_ERR_BUS_NAME	7 /* Can't parse bus name */
+#define SENSORS_ERR_PARSE	8 /* General parse error */
+#define SENSORS_ERR_ACCESS_W	9 /* Can't write */
+#define SENSORS_ERR_IO		10 /* I/O error */
+#define SENSORS_ERR_RECURSION	11 /* Evaluation recurses too deep */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+/* This function returns a pointer to a string which describes the error.
+   errnum may be negative (the corresponding positive error is returned).
+   You may not modify the result! */
+const char *sensors_strerror(int errnum);
+
+/* These functions are called when a parse error is detected. Give them new
+   values, and your own functions are called instead of the default (which
+   print to stderr). These functions may terminate the program, but they
+   usually output an error and return. The first function is the original
+   one, the second one was added later when support for multiple
+   configuration files was added.
+   The library code now only calls the second function. However, for
+   backwards compatibility, if an application provides a custom handling
+   function for the first function but not the second, then all parse
+   errors will be reported using the first function (that is, the filename
+   is never reported.)
+   Note that filename can be NULL (if filename isn't known) and lineno
+   can be 0 (if the error occurs before the actual parsing starts.) */
+extern void (*sensors_parse_error) (const char *err, int lineno);
+extern void (*sensors_parse_error_wfn) (const char *err,
+					const char *filename, int lineno);
+
+/* This function is called when an immediately fatal error (like no
+   memory left) is detected. Give it a new value, and your own function
+   is called instead of the default (which prints to stderr and ends
+   the program). Never let it return! */
+extern void (*sensors_fatal_error) (const char *proc, const char *err);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* def LIB_SENSORS_ERROR_H */
diff --git a/tools/gator/daemon/libsensors/general.c b/tools/gator/daemon/libsensors/general.c
new file mode 100644
index 000000000000..f237e3b59c22
--- /dev/null
+++ b/tools/gator/daemon/libsensors/general.c
@@ -0,0 +1,85 @@
+/*
+    general.c - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#include "error.h"
+#include "general.h"
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+
+#define A_BUNCH 16
+
+void sensors_malloc_array(void *list, int *num_el, int *max_el, int el_size)
+{
+	void **my_list = (void **)list;
+
+	*my_list = malloc(el_size*A_BUNCH);
+	if (! *my_list)
+		sensors_fatal_error(__func__, "Allocating new elements");
+	*max_el = A_BUNCH;
+	*num_el = 0;
+}
+
+void sensors_free_array(void *list, int *num_el, int *max_el)
+{
+	void **my_list = (void **)list;
+
+	free(*my_list);
+	*my_list = NULL;
+	*num_el = 0;
+	*max_el = 0;
+}
+
+void sensors_add_array_el(const void *el, void *list, int *num_el,
+			  int *max_el, int el_size)
+{
+	int new_max_el;
+	void **my_list = (void *)list;
+	if (*num_el + 1 > *max_el) {
+		new_max_el = *max_el + A_BUNCH;
+		*my_list = realloc(*my_list, new_max_el * el_size);
+		if (! *my_list)
+			sensors_fatal_error(__func__,
+					    "Allocating new elements");
+		*max_el = new_max_el;
+	}
+	memcpy(((char *) *my_list) + *num_el * el_size, el, el_size);
+	(*num_el) ++;
+}
+
+void sensors_add_array_els(const void *els, int nr_els, void *list,
+			   int *num_el, int *max_el, int el_size)
+{
+	int new_max_el;
+	void **my_list = (void *)list;
+	if (*num_el + nr_els > *max_el) {
+		new_max_el = (*max_el + nr_els + A_BUNCH);
+		new_max_el -= new_max_el % A_BUNCH;
+		*my_list = realloc(*my_list, new_max_el * el_size);
+		if (! *my_list)
+			sensors_fatal_error(__func__,
+					    "Allocating new elements");
+		*max_el = new_max_el;
+	}
+	memcpy(((char *)*my_list) + *num_el * el_size, els, el_size * nr_els);
+	*num_el += nr_els;
+}
diff --git a/tools/gator/daemon/libsensors/general.h b/tools/gator/daemon/libsensors/general.h
new file mode 100644
index 000000000000..a3971e02e1b7
--- /dev/null
+++ b/tools/gator/daemon/libsensors/general.h
@@ -0,0 +1,39 @@
+/*
+    general.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_GENERAL
+#define LIB_SENSORS_GENERAL
+
+/* These are general purpose functions. They allow you to use variable-
+   length arrays, which are extended automatically. A distinction is
+   made between the current number of elements and the maximum number.
+   You can only add elements at the end. Primitive, but very useful
+   for internal use. */
+void sensors_malloc_array(void *list, int *num_el, int *max_el,
+			  int el_size);
+void sensors_free_array(void *list, int *num_el, int *max_el);
+void sensors_add_array_el(const void *el, void *list, int *num_el,
+			  int *max_el, int el_size);
+void sensors_add_array_els(const void *els, int nr_els, void *list,
+			   int *num_el, int *max_el, int el_size);
+
+#define ARRAY_SIZE(arr)	(int)(sizeof(arr) / sizeof((arr)[0]))
+
+#endif /* LIB_SENSORS_GENERAL */
diff --git a/tools/gator/daemon/libsensors/init.c b/tools/gator/daemon/libsensors/init.c
new file mode 100644
index 000000000000..558046e76c24
--- /dev/null
+++ b/tools/gator/daemon/libsensors/init.c
@@ -0,0 +1,341 @@
+/*
+    init.c - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007, 2009  Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+/*** This file modified by ARM on Jan 23, 2013 to cast alphasort to supress a warning as it's prototype is different on android. ***/
+
+/* Needed for scandir() and alphasort() */
+#define _BSD_SOURCE
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <locale.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <dirent.h>
+#include <unistd.h>
+#include "sensors.h"
+#include "data.h"
+#include "error.h"
+#include "access.h"
+#include "conf.h"
+#include "sysfs.h"
+#include "scanner.h"
+#include "init.h"
+
+#define DEFAULT_CONFIG_FILE	ETCDIR "/sensors3.conf"
+#define ALT_CONFIG_FILE		ETCDIR "/sensors.conf"
+#define DEFAULT_CONFIG_DIR	ETCDIR "/sensors.d"
+
+/* Wrapper around sensors_yyparse(), which clears the locale so that
+   the decimal numbers are always parsed properly. */
+static int sensors_parse(void)
+{
+	int res;
+	char *locale;
+
+	/* Remember the current locale and clear it */
+	locale = setlocale(LC_ALL, NULL);
+	if (locale) {
+		locale = strdup(locale);
+		if (!locale)
+			sensors_fatal_error(__func__, "Out of memory");
+
+		setlocale(LC_ALL, "C");
+	}
+
+	res = sensors_yyparse();
+
+	/* Restore the old locale */
+	if (locale) {
+		setlocale(LC_ALL, locale);
+		free(locale);
+	}
+
+	return res;
+}
+
+static void free_bus(sensors_bus *bus)
+{
+	free(bus->adapter);
+}
+
+static void free_config_busses(void)
+{
+	int i;
+
+	for (i = 0; i < sensors_config_busses_count; i++)
+		free_bus(&sensors_config_busses[i]);
+	free(sensors_config_busses);
+	sensors_config_busses = NULL;
+	sensors_config_busses_count = sensors_config_busses_max = 0;
+}
+
+static int parse_config(FILE *input, const char *name)
+{
+	int err;
+	char *name_copy;
+
+	if (name) {
+		/* Record configuration file name for error reporting */
+		name_copy = strdup(name);
+		if (!name_copy)
+			sensors_fatal_error(__func__, "Out of memory");
+		sensors_add_config_files(&name_copy);
+	} else
+		name_copy = NULL;
+
+	if (sensors_scanner_init(input, name_copy)) {
+		err = -SENSORS_ERR_PARSE;
+		goto exit_cleanup;
+	}
+	err = sensors_parse();
+	sensors_scanner_exit();
+	if (err) {
+		err = -SENSORS_ERR_PARSE;
+		goto exit_cleanup;
+	}
+
+	err = sensors_substitute_busses();
+
+exit_cleanup:
+	free_config_busses();
+	return err;
+}
+
+static int config_file_filter(const struct dirent *entry)
+{
+	return entry->d_name[0] != '.';		/* Skip hidden files */
+}
+
+static int add_config_from_dir(const char *dir)
+{
+	int count, res, i;
+	struct dirent **namelist;
+
+	count = scandir(dir, &namelist, config_file_filter, (int (*)(const struct dirent **, const struct dirent **))alphasort);
+	if (count < 0) {
+		/* Do not return an error if directory does not exist */
+		if (errno == ENOENT)
+			return 0;
+		
+		sensors_parse_error_wfn(strerror(errno), NULL, 0);
+		return -SENSORS_ERR_PARSE;
+	}
+
+	for (res = 0, i = 0; !res && i < count; i++) {
+		int len;
+		char path[PATH_MAX];
+		FILE *input;
+		struct stat st;
+
+		len = snprintf(path, sizeof(path), "%s/%s", dir,
+			       namelist[i]->d_name);
+		if (len < 0 || len >= (int)sizeof(path)) {
+			res = -SENSORS_ERR_PARSE;
+			continue;
+		}
+
+		/* Only accept regular files */
+		if (stat(path, &st) < 0 || !S_ISREG(st.st_mode))
+			continue;
+
+		input = fopen(path, "r");
+		if (input) {
+			res = parse_config(input, path);
+			fclose(input);
+		} else {
+			res = -SENSORS_ERR_PARSE;
+			sensors_parse_error_wfn(strerror(errno), path, 0);
+		}
+	}
+
+	/* Free memory allocated by scandir() */
+	for (i = 0; i < count; i++)
+		free(namelist[i]);
+	free(namelist);
+
+	return res;
+}
+
+int sensors_init(FILE *input)
+{
+	int res;
+
+	if (!sensors_init_sysfs())
+		return -SENSORS_ERR_KERNEL;
+	if ((res = sensors_read_sysfs_bus()) ||
+	    (res = sensors_read_sysfs_chips()))
+		goto exit_cleanup;
+
+	if (input) {
+		res = parse_config(input, NULL);
+		if (res)
+			goto exit_cleanup;
+	} else {
+		const char* name;
+
+		/* No configuration provided, use default */
+		input = fopen(name = DEFAULT_CONFIG_FILE, "r");
+		if (!input && errno == ENOENT)
+			input = fopen(name = ALT_CONFIG_FILE, "r");
+		if (input) {
+			res = parse_config(input, name);
+			fclose(input);
+			if (res)
+				goto exit_cleanup;
+
+		} else if (errno != ENOENT) {
+			sensors_parse_error_wfn(strerror(errno), name, 0);
+			res = -SENSORS_ERR_PARSE;
+			goto exit_cleanup;
+		}
+
+		/* Also check for files in default directory */
+		res = add_config_from_dir(DEFAULT_CONFIG_DIR);
+		if (res)
+			goto exit_cleanup;
+	}
+
+	return 0;
+
+exit_cleanup:
+	sensors_cleanup();
+	return res;
+}
+
+static void free_chip_name(sensors_chip_name *name)
+{
+	free(name->prefix);
+	free(name->path);
+}
+
+static void free_chip_features(sensors_chip_features *features)
+{
+	int i;
+
+	for (i = 0; i < features->subfeature_count; i++)
+		free(features->subfeature[i].name);
+	free(features->subfeature);
+	for (i = 0; i < features->feature_count; i++)
+		free(features->feature[i].name);
+	free(features->feature);
+}
+
+static void free_label(sensors_label *label)
+{
+	free(label->name);
+	free(label->value);
+}
+
+void sensors_free_expr(sensors_expr *expr)
+{
+	if (expr->kind == sensors_kind_var)
+		free(expr->data.var);
+	else if (expr->kind == sensors_kind_sub) {
+		if (expr->data.subexpr.sub1)
+			sensors_free_expr(expr->data.subexpr.sub1);
+		if (expr->data.subexpr.sub2)
+			sensors_free_expr(expr->data.subexpr.sub2);
+	}
+	free(expr);
+}
+
+static void free_set(sensors_set *set)
+{
+	free(set->name);
+	sensors_free_expr(set->value);
+}
+
+static void free_compute(sensors_compute *compute)
+{
+	free(compute->name);
+	sensors_free_expr(compute->from_proc);
+	sensors_free_expr(compute->to_proc);
+}
+
+static void free_ignore(sensors_ignore *ignore)
+{
+	free(ignore->name);
+}
+
+static void free_chip(sensors_chip *chip)
+{
+	int i;
+
+	for (i = 0; i < chip->chips.fits_count; i++)
+		free_chip_name(&chip->chips.fits[i]);
+	free(chip->chips.fits);
+	chip->chips.fits_count = chip->chips.fits_max = 0;
+
+	for (i = 0; i < chip->labels_count; i++)
+		free_label(&chip->labels[i]);
+	free(chip->labels);
+	chip->labels_count = chip->labels_max = 0;
+
+	for (i = 0; i < chip->sets_count; i++)
+		free_set(&chip->sets[i]);
+	free(chip->sets);
+	chip->sets_count = chip->sets_max = 0;
+
+	for (i = 0; i < chip->computes_count; i++)
+		free_compute(&chip->computes[i]);
+	free(chip->computes);
+	chip->computes_count = chip->computes_max = 0;
+
+	for (i = 0; i < chip->ignores_count; i++)
+		free_ignore(&chip->ignores[i]);
+	free(chip->ignores);
+	chip->ignores_count = chip->ignores_max = 0;
+}
+
+void sensors_cleanup(void)
+{
+	int i;
+
+	for (i = 0; i < sensors_proc_chips_count; i++) {
+		free_chip_name(&sensors_proc_chips[i].chip);
+		free_chip_features(&sensors_proc_chips[i]);
+	}
+	free(sensors_proc_chips);
+	sensors_proc_chips = NULL;
+	sensors_proc_chips_count = sensors_proc_chips_max = 0;
+
+	for (i = 0; i < sensors_config_chips_count; i++)
+		free_chip(&sensors_config_chips[i]);
+	free(sensors_config_chips);
+	sensors_config_chips = NULL;
+	sensors_config_chips_count = sensors_config_chips_max = 0;
+	sensors_config_chips_subst = 0;
+
+	for (i = 0; i < sensors_proc_bus_count; i++)
+		free_bus(&sensors_proc_bus[i]);
+	free(sensors_proc_bus);
+	sensors_proc_bus = NULL;
+	sensors_proc_bus_count = sensors_proc_bus_max = 0;
+
+	for (i = 0; i < sensors_config_files_count; i++)
+		free(sensors_config_files[i]);
+	free(sensors_config_files);
+	sensors_config_files = NULL;
+	sensors_config_files_count = sensors_config_files_max = 0;
+}
diff --git a/tools/gator/daemon/libsensors/init.h b/tools/gator/daemon/libsensors/init.h
new file mode 100644
index 000000000000..47006a62db63
--- /dev/null
+++ b/tools/gator/daemon/libsensors/init.h
@@ -0,0 +1,28 @@
+/*
+    init.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (C) 2007  Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef LIB_SENSORS_INIT_H
+#define LIB_SENSORS_INIT_H
+
+#include "data.h"
+
+void sensors_free_expr(sensors_expr *expr);
+
+#endif /* def LIB_SENSORS_INIT_H */
diff --git a/tools/gator/daemon/libsensors/scanner.h b/tools/gator/daemon/libsensors/scanner.h
new file mode 100644
index 000000000000..4c415167de5c
--- /dev/null
+++ b/tools/gator/daemon/libsensors/scanner.h
@@ -0,0 +1,32 @@
+/*
+    scanner.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 2006 Mark M. Hoffman <mhoffman@lightlink.com>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+/*** This file modified by ARM on Jan 23, 2013 to fix input defined but not used warning from conf-lex.c. ***/
+
+#ifndef LIB_SENSORS_SCANNER_H
+#define LIB_SENSORS_SCANNER_H
+
+int sensors_scanner_init(FILE *input, const char *filename);
+void sensors_scanner_exit(void);
+
+#define YY_NO_INPUT
+
+#endif
+
diff --git a/tools/gator/daemon/libsensors/sensors.h b/tools/gator/daemon/libsensors/sensors.h
new file mode 100644
index 000000000000..7874d028dc82
--- /dev/null
+++ b/tools/gator/daemon/libsensors/sensors.h
@@ -0,0 +1,311 @@
+/*
+    sensors.h - Part of libsensors, a Linux library for reading sensor data.
+    Copyright (c) 1998, 1999  Frodo Looijaard <frodol@dds.nl>
+    Copyright (C) 2007, 2010  Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+/*** This file modified by ARM on Jan 23, 2013 to read non-scaled values. ***/
+
+#ifndef LIB_SENSORS_SENSORS_H
+#define LIB_SENSORS_SENSORS_H
+
+#include <stdio.h>
+#include <limits.h>
+
+/* Publicly accessible library functions */
+
+/* libsensors API version define, first digit is the major version (changed
+   when the API + ABI breaks), the third digit is incremented to track small
+   API additions like new flags / enum values. The second digit is for tracking
+   larger additions like new methods. */
+#define SENSORS_API_VERSION		0x432
+
+#define SENSORS_CHIP_NAME_PREFIX_ANY	NULL
+#define SENSORS_CHIP_NAME_ADDR_ANY	(-1)
+
+#define SENSORS_BUS_TYPE_ANY		(-1)
+#define SENSORS_BUS_TYPE_I2C		0
+#define SENSORS_BUS_TYPE_ISA		1
+#define SENSORS_BUS_TYPE_PCI		2
+#define SENSORS_BUS_TYPE_SPI		3
+#define SENSORS_BUS_TYPE_VIRTUAL	4
+#define SENSORS_BUS_TYPE_ACPI		5
+#define SENSORS_BUS_TYPE_HID		6
+#define SENSORS_BUS_NR_ANY		(-1)
+#define SENSORS_BUS_NR_IGNORE		(-2)
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+extern const char *libsensors_version;
+
+extern int sensors_sysfs_no_scaling;
+
+typedef struct sensors_bus_id {
+	short type;
+	short nr;
+} sensors_bus_id;
+
+/* A chip name is encoded in this structure */
+typedef struct sensors_chip_name {
+	char *prefix;
+	sensors_bus_id bus;
+	int addr;
+	char *path;
+} sensors_chip_name;
+
+/* Load the configuration file and the detected chips list. If this
+   returns a value unequal to zero, you are in trouble; you can not
+   assume anything will be initialized properly. If you want to
+   reload the configuration file, call sensors_cleanup() below before
+   calling sensors_init() again. */
+int sensors_init(FILE *input);
+
+/* Clean-up function: You can't access anything after
+   this, until the next sensors_init() call! */
+void sensors_cleanup(void);
+
+/* Parse a chip name to the internal representation. Return 0 on success, <0
+   on error. */
+int sensors_parse_chip_name(const char *orig_name, sensors_chip_name *res);
+
+/* Free memory allocated for the internal representation of a chip name. */
+void sensors_free_chip_name(sensors_chip_name *chip);
+
+/* Print a chip name from its internal representation. Note that chip should
+   not contain wildcard values! Return the number of characters printed on
+   success (same as snprintf), <0 on error. */
+int sensors_snprintf_chip_name(char *str, size_t size,
+			       const sensors_chip_name *chip);
+
+/* This function returns the adapter name of a bus,
+   as used within the sensors_chip_name structure. If it could not be found,
+   it returns NULL */
+const char *sensors_get_adapter_name(const sensors_bus_id *bus);
+
+typedef struct sensors_feature sensors_feature;
+
+/* Look up the label for a given feature. Note that chip should not
+   contain wildcard values! The returned string is newly allocated (free it
+   yourself). On failure, NULL is returned.
+   If no label exists for this feature, its name is returned itself. */
+char *sensors_get_label(const sensors_chip_name *name,
+			const sensors_feature *feature);
+
+/* Read the value of a subfeature of a certain chip. Note that chip should not
+   contain wildcard values! This function will return 0 on success, and <0
+   on failure.  */
+int sensors_get_value(const sensors_chip_name *name, int subfeat_nr,
+		      double *value);
+
+/* Set the value of a subfeature of a certain chip. Note that chip should not
+   contain wildcard values! This function will return 0 on success, and <0
+   on failure. */
+int sensors_set_value(const sensors_chip_name *name, int subfeat_nr,
+		      double value);
+
+/* Execute all set statements for this particular chip. The chip may contain
+   wildcards!  This function will return 0 on success, and <0 on failure. */
+int sensors_do_chip_sets(const sensors_chip_name *name);
+
+/* This function returns all detected chips that match a given chip name,
+   one by one. If no chip name is provided, all detected chips are returned.
+   To start at the beginning of the list, use 0 for nr; NULL is returned if
+   we are at the end of the list. Do not try to change these chip names, as
+   they point to internal structures! */
+const sensors_chip_name *sensors_get_detected_chips(const sensors_chip_name
+						    *match, int *nr);
+
+/* These defines are used in the flags field of sensors_subfeature */
+#define SENSORS_MODE_R			1
+#define SENSORS_MODE_W			2
+#define SENSORS_COMPUTE_MAPPING		4
+
+typedef enum sensors_feature_type {
+	SENSORS_FEATURE_IN		= 0x00,
+	SENSORS_FEATURE_FAN		= 0x01,
+	SENSORS_FEATURE_TEMP		= 0x02,
+	SENSORS_FEATURE_POWER		= 0x03,
+	SENSORS_FEATURE_ENERGY		= 0x04,
+	SENSORS_FEATURE_CURR		= 0x05,
+	SENSORS_FEATURE_HUMIDITY	= 0x06,
+	SENSORS_FEATURE_MAX_MAIN,
+	SENSORS_FEATURE_VID		= 0x10,
+	SENSORS_FEATURE_INTRUSION	= 0x11,
+	SENSORS_FEATURE_MAX_OTHER,
+	SENSORS_FEATURE_BEEP_ENABLE	= 0x18,
+	SENSORS_FEATURE_UNKNOWN		= INT_MAX,
+} sensors_feature_type;
+
+/* All the sensor types (in, fan, temp, vid) are a multiple of 0x100 apart,
+   and sensor subfeatures which have no compute mapping have bit 7 set. */
+typedef enum sensors_subfeature_type {
+	SENSORS_SUBFEATURE_IN_INPUT = SENSORS_FEATURE_IN << 8,
+	SENSORS_SUBFEATURE_IN_MIN,
+	SENSORS_SUBFEATURE_IN_MAX,
+	SENSORS_SUBFEATURE_IN_LCRIT,
+	SENSORS_SUBFEATURE_IN_CRIT,
+	SENSORS_SUBFEATURE_IN_AVERAGE,
+	SENSORS_SUBFEATURE_IN_LOWEST,
+	SENSORS_SUBFEATURE_IN_HIGHEST,
+	SENSORS_SUBFEATURE_IN_ALARM = (SENSORS_FEATURE_IN << 8) | 0x80,
+	SENSORS_SUBFEATURE_IN_MIN_ALARM,
+	SENSORS_SUBFEATURE_IN_MAX_ALARM,
+	SENSORS_SUBFEATURE_IN_BEEP,
+	SENSORS_SUBFEATURE_IN_LCRIT_ALARM,
+	SENSORS_SUBFEATURE_IN_CRIT_ALARM,
+
+	SENSORS_SUBFEATURE_FAN_INPUT = SENSORS_FEATURE_FAN << 8,
+	SENSORS_SUBFEATURE_FAN_MIN,
+	SENSORS_SUBFEATURE_FAN_MAX,
+	SENSORS_SUBFEATURE_FAN_ALARM = (SENSORS_FEATURE_FAN << 8) | 0x80,
+	SENSORS_SUBFEATURE_FAN_FAULT,
+	SENSORS_SUBFEATURE_FAN_DIV,
+	SENSORS_SUBFEATURE_FAN_BEEP,
+	SENSORS_SUBFEATURE_FAN_PULSES,
+	SENSORS_SUBFEATURE_FAN_MIN_ALARM,
+	SENSORS_SUBFEATURE_FAN_MAX_ALARM,
+
+	SENSORS_SUBFEATURE_TEMP_INPUT = SENSORS_FEATURE_TEMP << 8,
+	SENSORS_SUBFEATURE_TEMP_MAX,
+	SENSORS_SUBFEATURE_TEMP_MAX_HYST,
+	SENSORS_SUBFEATURE_TEMP_MIN,
+	SENSORS_SUBFEATURE_TEMP_CRIT,
+	SENSORS_SUBFEATURE_TEMP_CRIT_HYST,
+	SENSORS_SUBFEATURE_TEMP_LCRIT,
+	SENSORS_SUBFEATURE_TEMP_EMERGENCY,
+	SENSORS_SUBFEATURE_TEMP_EMERGENCY_HYST,
+	SENSORS_SUBFEATURE_TEMP_LOWEST,
+	SENSORS_SUBFEATURE_TEMP_HIGHEST,
+	SENSORS_SUBFEATURE_TEMP_ALARM = (SENSORS_FEATURE_TEMP << 8) | 0x80,
+	SENSORS_SUBFEATURE_TEMP_MAX_ALARM,
+	SENSORS_SUBFEATURE_TEMP_MIN_ALARM,
+	SENSORS_SUBFEATURE_TEMP_CRIT_ALARM,
+	SENSORS_SUBFEATURE_TEMP_FAULT,
+	SENSORS_SUBFEATURE_TEMP_TYPE,
+	SENSORS_SUBFEATURE_TEMP_OFFSET,
+	SENSORS_SUBFEATURE_TEMP_BEEP,
+	SENSORS_SUBFEATURE_TEMP_EMERGENCY_ALARM,
+	SENSORS_SUBFEATURE_TEMP_LCRIT_ALARM,
+
+	SENSORS_SUBFEATURE_POWER_AVERAGE = SENSORS_FEATURE_POWER << 8,
+	SENSORS_SUBFEATURE_POWER_AVERAGE_HIGHEST,
+	SENSORS_SUBFEATURE_POWER_AVERAGE_LOWEST,
+	SENSORS_SUBFEATURE_POWER_INPUT,
+	SENSORS_SUBFEATURE_POWER_INPUT_HIGHEST,
+	SENSORS_SUBFEATURE_POWER_INPUT_LOWEST,
+	SENSORS_SUBFEATURE_POWER_CAP,
+	SENSORS_SUBFEATURE_POWER_CAP_HYST,
+	SENSORS_SUBFEATURE_POWER_MAX,
+	SENSORS_SUBFEATURE_POWER_CRIT,
+	SENSORS_SUBFEATURE_POWER_AVERAGE_INTERVAL = (SENSORS_FEATURE_POWER << 8) | 0x80,
+	SENSORS_SUBFEATURE_POWER_ALARM,
+	SENSORS_SUBFEATURE_POWER_CAP_ALARM,
+	SENSORS_SUBFEATURE_POWER_MAX_ALARM,
+	SENSORS_SUBFEATURE_POWER_CRIT_ALARM,
+
+	SENSORS_SUBFEATURE_ENERGY_INPUT = SENSORS_FEATURE_ENERGY << 8,
+
+	SENSORS_SUBFEATURE_CURR_INPUT = SENSORS_FEATURE_CURR << 8,
+	SENSORS_SUBFEATURE_CURR_MIN,
+	SENSORS_SUBFEATURE_CURR_MAX,
+	SENSORS_SUBFEATURE_CURR_LCRIT,
+	SENSORS_SUBFEATURE_CURR_CRIT,
+	SENSORS_SUBFEATURE_CURR_AVERAGE,
+	SENSORS_SUBFEATURE_CURR_LOWEST,
+	SENSORS_SUBFEATURE_CURR_HIGHEST,
+	SENSORS_SUBFEATURE_CURR_ALARM = (SENSORS_FEATURE_CURR << 8) | 0x80,
+	SENSORS_SUBFEATURE_CURR_MIN_ALARM,
+	SENSORS_SUBFEATURE_CURR_MAX_ALARM,
+	SENSORS_SUBFEATURE_CURR_BEEP,
+	SENSORS_SUBFEATURE_CURR_LCRIT_ALARM,
+	SENSORS_SUBFEATURE_CURR_CRIT_ALARM,
+
+	SENSORS_SUBFEATURE_HUMIDITY_INPUT = SENSORS_FEATURE_HUMIDITY << 8,
+
+	SENSORS_SUBFEATURE_VID = SENSORS_FEATURE_VID << 8,
+
+	SENSORS_SUBFEATURE_INTRUSION_ALARM = SENSORS_FEATURE_INTRUSION << 8,
+	SENSORS_SUBFEATURE_INTRUSION_BEEP,
+
+	SENSORS_SUBFEATURE_BEEP_ENABLE = SENSORS_FEATURE_BEEP_ENABLE << 8,
+
+	SENSORS_SUBFEATURE_UNKNOWN = INT_MAX,
+} sensors_subfeature_type;
+
+/* Data about a single chip feature (or category leader) */
+struct sensors_feature {
+	char *name;
+	int number;
+	sensors_feature_type type;
+	/* Members below are for libsensors internal use only */
+	int first_subfeature;
+	int padding1;
+};
+
+/* Data about a single chip subfeature:
+   name is the string name used to refer to this subfeature (in config files)
+   number is the internal subfeature number, used in many functions to refer
+     to this subfeature
+   type is the subfeature type
+   mapping is the number of a main feature this subfeature belongs to
+     (for example subfeatures fan1_input, fan1_min, fan1_div and fan1_alarm
+      are mapped to main feature fan1)
+   flags is a bitfield, its value is a combination of SENSORS_MODE_R (readable),
+     SENSORS_MODE_W (writable) and SENSORS_COMPUTE_MAPPING (affected by the
+     computation rules of the main feature) */
+typedef struct sensors_subfeature {
+	char *name;
+	int number;
+	sensors_subfeature_type type;
+	int mapping;
+	unsigned int flags;
+} sensors_subfeature;
+
+/* This returns all main features of a specific chip. nr is an internally
+   used variable. Set it to zero to start at the begin of the list. If no
+   more features are found NULL is returned.
+   Do not try to change the returned structure; you will corrupt internal
+   data structures. */
+const sensors_feature *
+sensors_get_features(const sensors_chip_name *name, int *nr);
+
+/* This returns all subfeatures of a given main feature. nr is an internally
+   used variable. Set it to zero to start at the begin of the list. If no
+   more features are found NULL is returned.
+   Do not try to change the returned structure; you will corrupt internal
+   data structures. */
+const sensors_subfeature *
+sensors_get_all_subfeatures(const sensors_chip_name *name,
+			    const sensors_feature *feature, int *nr);
+
+/* This returns the subfeature of the given type for a given main feature,
+   if it exists, NULL otherwise.
+   Do not try to change the returned structure; you will corrupt internal
+   data structures. */
+const sensors_subfeature *
+sensors_get_subfeature(const sensors_chip_name *name,
+		       const sensors_feature *feature,
+		       sensors_subfeature_type type);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* def LIB_SENSORS_ERROR_H */
diff --git a/tools/gator/daemon/libsensors/sysfs.c b/tools/gator/daemon/libsensors/sysfs.c
new file mode 100644
index 000000000000..2b494c9975b4
--- /dev/null
+++ b/tools/gator/daemon/libsensors/sysfs.c
@@ -0,0 +1,926 @@
+/*
+    sysfs.c - Part of libsensors, a library for reading Linux sensor data
+    Copyright (c) 2005 Mark M. Hoffman <mhoffman@lightlink.com>
+    Copyright (C) 2007-2010 Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+/*** This file modified by ARM on Jan 23, 2013 to improve performance by substituting calls to fread() with calls to read() and to read non-scaled values. ***/
+
+/* this define needed for strndup() */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include "data.h"
+#include "error.h"
+#include "access.h"
+#include "general.h"
+#include "sysfs.h"
+
+
+/****************************************************************************/
+
+#define ATTR_MAX	128
+#define SYSFS_MAGIC	0x62656572
+
+int sensors_sysfs_no_scaling;
+
+/*
+ * Read an attribute from sysfs
+ * Returns a pointer to a freshly allocated string; free it yourself.
+ * If the file doesn't exist or can't be read, NULL is returned.
+ */
+static char *sysfs_read_attr(const char *device, const char *attr)
+{
+	char path[NAME_MAX];
+	char buf[ATTR_MAX], *p;
+	FILE *f;
+
+	snprintf(path, NAME_MAX, "%s/%s", device, attr);
+
+	if (!(f = fopen(path, "r")))
+		return NULL;
+	p = fgets(buf, ATTR_MAX, f);
+	fclose(f);
+	if (!p)
+		return NULL;
+
+	/* Last byte is a '\n'; chop that off */
+	p = strndup(buf, strlen(buf) - 1);
+	if (!p)
+		sensors_fatal_error(__func__, "Out of memory");
+	return p;
+}
+
+/*
+ * Call an arbitrary function for each class device of the given class
+ * Returns 0 on success (all calls returned 0), a positive errno for
+ * local errors, or a negative error value if any call fails.
+ */
+static int sysfs_foreach_classdev(const char *class_name,
+				   int (*func)(const char *, const char *))
+{
+	char path[NAME_MAX];
+	int path_off, ret;
+	DIR *dir;
+	struct dirent *ent;
+
+	path_off = snprintf(path, NAME_MAX, "%s/class/%s",
+			    sensors_sysfs_mount, class_name);
+	if (!(dir = opendir(path)))
+		return errno;
+
+	ret = 0;
+	while (!ret && (ent = readdir(dir))) {
+		if (ent->d_name[0] == '.')	/* skip hidden entries */
+			continue;
+
+		snprintf(path + path_off, NAME_MAX - path_off, "/%s",
+			 ent->d_name);
+		ret = func(path, ent->d_name);
+	}
+
+	closedir(dir);
+	return ret;
+}
+
+/*
+ * Call an arbitrary function for each device of the given bus type
+ * Returns 0 on success (all calls returned 0), a positive errno for
+ * local errors, or a negative error value if any call fails.
+ */
+static int sysfs_foreach_busdev(const char *bus_type,
+				 int (*func)(const char *, const char *))
+{
+	char path[NAME_MAX];
+	int path_off, ret;
+	DIR *dir;
+	struct dirent *ent;
+
+	path_off = snprintf(path, NAME_MAX, "%s/bus/%s/devices",
+			    sensors_sysfs_mount, bus_type);
+	if (!(dir = opendir(path)))
+		return errno;
+
+	ret = 0;
+	while (!ret && (ent = readdir(dir))) {
+		if (ent->d_name[0] == '.')	/* skip hidden entries */
+			continue;
+
+		snprintf(path + path_off, NAME_MAX - path_off, "/%s",
+			 ent->d_name);
+		ret = func(path, ent->d_name);
+	}
+
+	closedir(dir);
+	return ret;
+}
+
+/****************************************************************************/
+
+char sensors_sysfs_mount[NAME_MAX];
+
+#define MAX_MAIN_SENSOR_TYPES	(SENSORS_FEATURE_MAX_MAIN - SENSORS_FEATURE_IN)
+#define MAX_OTHER_SENSOR_TYPES	(SENSORS_FEATURE_MAX_OTHER - SENSORS_FEATURE_VID)
+#define MAX_SENSORS_PER_TYPE	24
+/* max_subfeatures is now computed dynamically */
+#define FEATURE_SIZE		(max_subfeatures * 2)
+#define FEATURE_TYPE_SIZE	(MAX_SENSORS_PER_TYPE * FEATURE_SIZE)
+
+/*
+ * Room for all 7 main types (in, fan, temp, power, energy, current, humidity)
+ * and 2 other types (VID, intrusion) with all their subfeatures + misc features
+ */
+#define SUB_OFFSET_OTHER	(MAX_MAIN_SENSOR_TYPES * FEATURE_TYPE_SIZE)
+#define SUB_OFFSET_MISC		(SUB_OFFSET_OTHER + \
+				 MAX_OTHER_SENSOR_TYPES * FEATURE_TYPE_SIZE)
+#define ALL_POSSIBLE_SUBFEATURES	(SUB_OFFSET_MISC + 1)
+
+static
+int get_type_scaling(sensors_subfeature_type type)
+{
+	/* Multipliers for subfeatures */
+	switch (type & 0xFF80) {
+	case SENSORS_SUBFEATURE_IN_INPUT:
+	case SENSORS_SUBFEATURE_TEMP_INPUT:
+	case SENSORS_SUBFEATURE_CURR_INPUT:
+	case SENSORS_SUBFEATURE_HUMIDITY_INPUT:
+		return 1000;
+	case SENSORS_SUBFEATURE_FAN_INPUT:
+		return 1;
+	case SENSORS_SUBFEATURE_POWER_AVERAGE:
+	case SENSORS_SUBFEATURE_ENERGY_INPUT:
+		return 1000000;
+	}
+
+	/* Multipliers for second class subfeatures
+	   that need their own multiplier */
+	switch (type) {
+	case SENSORS_SUBFEATURE_POWER_AVERAGE_INTERVAL:
+	case SENSORS_SUBFEATURE_VID:
+	case SENSORS_SUBFEATURE_TEMP_OFFSET:
+		return 1000;
+	default:
+		return 1;
+	}
+}
+
+static
+char *get_feature_name(sensors_feature_type ftype, char *sfname)
+{
+	char *name, *underscore;
+
+	switch (ftype) {
+	case SENSORS_FEATURE_IN:
+	case SENSORS_FEATURE_FAN:
+	case SENSORS_FEATURE_TEMP:
+	case SENSORS_FEATURE_POWER:
+	case SENSORS_FEATURE_ENERGY:
+	case SENSORS_FEATURE_CURR:
+	case SENSORS_FEATURE_HUMIDITY:
+	case SENSORS_FEATURE_INTRUSION:
+		underscore = strchr(sfname, '_');
+		name = strndup(sfname, underscore - sfname);
+		if (!name)
+			sensors_fatal_error(__func__, "Out of memory");
+
+		break;
+	default:
+		name = strdup(sfname);
+		if (!name)
+			sensors_fatal_error(__func__, "Out of memory");
+	}
+
+	return name;
+}
+
+/* Static mappings for use by sensors_subfeature_get_type() */
+struct subfeature_type_match
+{
+	const char *name;
+	sensors_subfeature_type type;
+};
+
+struct feature_type_match
+{
+	const char *name;
+	const struct subfeature_type_match *submatches;
+};
+
+static const struct subfeature_type_match temp_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_TEMP_INPUT },
+	{ "max", SENSORS_SUBFEATURE_TEMP_MAX },
+	{ "max_hyst", SENSORS_SUBFEATURE_TEMP_MAX_HYST },
+	{ "min", SENSORS_SUBFEATURE_TEMP_MIN },
+	{ "crit", SENSORS_SUBFEATURE_TEMP_CRIT },
+	{ "crit_hyst", SENSORS_SUBFEATURE_TEMP_CRIT_HYST },
+	{ "lcrit", SENSORS_SUBFEATURE_TEMP_LCRIT },
+	{ "emergency", SENSORS_SUBFEATURE_TEMP_EMERGENCY },
+	{ "emergency_hyst", SENSORS_SUBFEATURE_TEMP_EMERGENCY_HYST },
+	{ "lowest", SENSORS_SUBFEATURE_TEMP_LOWEST },
+	{ "highest", SENSORS_SUBFEATURE_TEMP_HIGHEST },
+	{ "alarm", SENSORS_SUBFEATURE_TEMP_ALARM },
+	{ "min_alarm", SENSORS_SUBFEATURE_TEMP_MIN_ALARM },
+	{ "max_alarm", SENSORS_SUBFEATURE_TEMP_MAX_ALARM },
+	{ "crit_alarm", SENSORS_SUBFEATURE_TEMP_CRIT_ALARM },
+	{ "emergency_alarm", SENSORS_SUBFEATURE_TEMP_EMERGENCY_ALARM },
+	{ "lcrit_alarm", SENSORS_SUBFEATURE_TEMP_LCRIT_ALARM },
+	{ "fault", SENSORS_SUBFEATURE_TEMP_FAULT },
+	{ "type", SENSORS_SUBFEATURE_TEMP_TYPE },
+	{ "offset", SENSORS_SUBFEATURE_TEMP_OFFSET },
+	{ "beep", SENSORS_SUBFEATURE_TEMP_BEEP },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match in_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_IN_INPUT },
+	{ "min", SENSORS_SUBFEATURE_IN_MIN },
+	{ "max", SENSORS_SUBFEATURE_IN_MAX },
+	{ "lcrit", SENSORS_SUBFEATURE_IN_LCRIT },
+	{ "crit", SENSORS_SUBFEATURE_IN_CRIT },
+	{ "average", SENSORS_SUBFEATURE_IN_AVERAGE },
+	{ "lowest", SENSORS_SUBFEATURE_IN_LOWEST },
+	{ "highest", SENSORS_SUBFEATURE_IN_HIGHEST },
+	{ "alarm", SENSORS_SUBFEATURE_IN_ALARM },
+	{ "min_alarm", SENSORS_SUBFEATURE_IN_MIN_ALARM },
+	{ "max_alarm", SENSORS_SUBFEATURE_IN_MAX_ALARM },
+	{ "lcrit_alarm", SENSORS_SUBFEATURE_IN_LCRIT_ALARM },
+	{ "crit_alarm", SENSORS_SUBFEATURE_IN_CRIT_ALARM },
+	{ "beep", SENSORS_SUBFEATURE_IN_BEEP },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match fan_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_FAN_INPUT },
+	{ "min", SENSORS_SUBFEATURE_FAN_MIN },
+	{ "max", SENSORS_SUBFEATURE_FAN_MAX },
+	{ "div", SENSORS_SUBFEATURE_FAN_DIV },
+	{ "pulses", SENSORS_SUBFEATURE_FAN_PULSES },
+	{ "alarm", SENSORS_SUBFEATURE_FAN_ALARM },
+	{ "min_alarm", SENSORS_SUBFEATURE_FAN_MIN_ALARM },
+	{ "max_alarm", SENSORS_SUBFEATURE_FAN_MAX_ALARM },
+	{ "fault", SENSORS_SUBFEATURE_FAN_FAULT },
+	{ "beep", SENSORS_SUBFEATURE_FAN_BEEP },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match power_matches[] = {
+	{ "average", SENSORS_SUBFEATURE_POWER_AVERAGE },
+	{ "average_highest", SENSORS_SUBFEATURE_POWER_AVERAGE_HIGHEST },
+	{ "average_lowest", SENSORS_SUBFEATURE_POWER_AVERAGE_LOWEST },
+	{ "input", SENSORS_SUBFEATURE_POWER_INPUT },
+	{ "input_highest", SENSORS_SUBFEATURE_POWER_INPUT_HIGHEST },
+	{ "input_lowest", SENSORS_SUBFEATURE_POWER_INPUT_LOWEST },
+	{ "cap", SENSORS_SUBFEATURE_POWER_CAP },
+	{ "cap_hyst", SENSORS_SUBFEATURE_POWER_CAP_HYST },
+	{ "cap_alarm", SENSORS_SUBFEATURE_POWER_CAP_ALARM },
+	{ "alarm", SENSORS_SUBFEATURE_POWER_ALARM },
+	{ "max", SENSORS_SUBFEATURE_POWER_MAX },
+	{ "max_alarm", SENSORS_SUBFEATURE_POWER_MAX_ALARM },
+	{ "crit", SENSORS_SUBFEATURE_POWER_CRIT },
+	{ "crit_alarm", SENSORS_SUBFEATURE_POWER_CRIT_ALARM },
+	{ "average_interval", SENSORS_SUBFEATURE_POWER_AVERAGE_INTERVAL },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match energy_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_ENERGY_INPUT },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match curr_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_CURR_INPUT },
+	{ "min", SENSORS_SUBFEATURE_CURR_MIN },
+	{ "max", SENSORS_SUBFEATURE_CURR_MAX },
+	{ "lcrit", SENSORS_SUBFEATURE_CURR_LCRIT },
+	{ "crit", SENSORS_SUBFEATURE_CURR_CRIT },
+	{ "average", SENSORS_SUBFEATURE_CURR_AVERAGE },
+	{ "lowest", SENSORS_SUBFEATURE_CURR_LOWEST },
+	{ "highest", SENSORS_SUBFEATURE_CURR_HIGHEST },
+	{ "alarm", SENSORS_SUBFEATURE_CURR_ALARM },
+	{ "min_alarm", SENSORS_SUBFEATURE_CURR_MIN_ALARM },
+	{ "max_alarm", SENSORS_SUBFEATURE_CURR_MAX_ALARM },
+	{ "lcrit_alarm", SENSORS_SUBFEATURE_CURR_LCRIT_ALARM },
+	{ "crit_alarm", SENSORS_SUBFEATURE_CURR_CRIT_ALARM },
+	{ "beep", SENSORS_SUBFEATURE_CURR_BEEP },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match humidity_matches[] = {
+	{ "input", SENSORS_SUBFEATURE_HUMIDITY_INPUT },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match cpu_matches[] = {
+	{ "vid", SENSORS_SUBFEATURE_VID },
+	{ NULL, 0 }
+};
+
+static const struct subfeature_type_match intrusion_matches[] = {
+	{ "alarm", SENSORS_SUBFEATURE_INTRUSION_ALARM },
+	{ "beep", SENSORS_SUBFEATURE_INTRUSION_BEEP },
+	{ NULL, 0 }
+};
+static struct feature_type_match matches[] = {
+	{ "temp%d%c", temp_matches },
+	{ "in%d%c", in_matches },
+	{ "fan%d%c", fan_matches },
+	{ "cpu%d%c", cpu_matches },
+	{ "power%d%c", power_matches },
+	{ "curr%d%c", curr_matches },
+	{ "energy%d%c", energy_matches },
+	{ "intrusion%d%c", intrusion_matches },
+	{ "humidity%d%c", humidity_matches },
+};
+
+/* Return the subfeature type and channel number based on the subfeature
+   name */
+static
+sensors_subfeature_type sensors_subfeature_get_type(const char *name, int *nr)
+{
+	char c;
+	int i, count;
+	const struct subfeature_type_match *submatches;
+
+	/* Special case */
+	if (!strcmp(name, "beep_enable")) {
+		*nr = 0;
+		return SENSORS_SUBFEATURE_BEEP_ENABLE;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(matches); i++)
+		if ((count = sscanf(name, matches[i].name, nr, &c)))
+			break;
+
+	if (i == ARRAY_SIZE(matches) || count != 2 || c != '_')
+		return SENSORS_SUBFEATURE_UNKNOWN;  /* no match */
+
+	submatches = matches[i].submatches;
+	name = strchr(name + 3, '_') + 1;
+	for (i = 0; submatches[i].name != NULL; i++)
+		if (!strcmp(name, submatches[i].name))
+			return submatches[i].type;
+
+	return SENSORS_SUBFEATURE_UNKNOWN;
+}
+
+static int sensors_compute_max(void)
+{
+	int i, j, max, offset;
+	const struct subfeature_type_match *submatches;
+	sensors_feature_type ftype;
+
+	max = 0;
+	for (i = 0; i < ARRAY_SIZE(matches); i++) {
+		submatches = matches[i].submatches;
+		for (j = 0; submatches[j].name != NULL; j++) {
+			ftype = submatches[j].type >> 8;
+
+			if (ftype < SENSORS_FEATURE_VID) {
+				offset = submatches[j].type & 0x7F;
+				if (offset >= max)
+					max = offset + 1;
+			} else {
+				offset = submatches[j].type & 0xFF;
+				if (offset >= max * 2)
+					max = ((offset + 1) + 1) / 2;
+			}
+		}
+	}
+
+	return max;
+}
+
+static int sensors_get_attr_mode(const char *device, const char *attr)
+{
+	char path[NAME_MAX];
+	struct stat st;
+	int mode = 0;
+
+	snprintf(path, NAME_MAX, "%s/%s", device, attr);
+	if (!stat(path, &st)) {
+		if (st.st_mode & S_IRUSR)
+			mode |= SENSORS_MODE_R;
+		if (st.st_mode & S_IWUSR)
+			mode |= SENSORS_MODE_W;
+	}
+	return mode;
+}
+
+static int sensors_read_dynamic_chip(sensors_chip_features *chip,
+				     const char *dev_path)
+{
+	int i, fnum = 0, sfnum = 0, prev_slot;
+	static int max_subfeatures;
+	DIR *dir;
+	struct dirent *ent;
+	sensors_subfeature *all_subfeatures;
+	sensors_subfeature *dyn_subfeatures;
+	sensors_feature *dyn_features;
+	sensors_feature_type ftype;
+	sensors_subfeature_type sftype;
+
+	if (!(dir = opendir(dev_path)))
+		return -errno;
+
+	/* Dynamically figure out the max number of subfeatures */
+	if (!max_subfeatures)
+		max_subfeatures = sensors_compute_max();
+
+	/* We use a large sparse table at first to store all found
+	   subfeatures, so that we can store them sorted at type and index
+	   and then later create a dense sorted table. */
+	all_subfeatures = calloc(ALL_POSSIBLE_SUBFEATURES,
+				 sizeof(sensors_subfeature));
+	if (!all_subfeatures)
+		sensors_fatal_error(__func__, "Out of memory");
+
+	while ((ent = readdir(dir))) {
+		char *name;
+		int nr;
+
+		/* Skip directories and symlinks */
+		if (ent->d_type != DT_REG)
+			continue;
+
+		name = ent->d_name;
+
+		sftype = sensors_subfeature_get_type(name, &nr);
+		if (sftype == SENSORS_SUBFEATURE_UNKNOWN)
+			continue;
+		ftype = sftype >> 8;
+
+		/* Adjust the channel number */
+		switch (ftype) {
+		case SENSORS_FEATURE_FAN:
+		case SENSORS_FEATURE_TEMP:
+		case SENSORS_FEATURE_POWER:
+		case SENSORS_FEATURE_ENERGY:
+		case SENSORS_FEATURE_CURR:
+		case SENSORS_FEATURE_HUMIDITY:
+			nr--;
+			break;
+		default:
+			break;
+		}
+
+		if (nr < 0 || nr >= MAX_SENSORS_PER_TYPE) {
+			/* More sensors of one type than MAX_SENSORS_PER_TYPE,
+			   we have to ignore it */
+#ifdef DEBUG
+			sensors_fatal_error(__func__,
+					    "Increase MAX_SENSORS_PER_TYPE!");
+#endif
+			continue;
+		}
+
+		/* "calculate" a place to store the subfeature in our sparse,
+		   sorted table */
+		switch (ftype) {
+		case SENSORS_FEATURE_VID:
+		case SENSORS_FEATURE_INTRUSION:
+			i = SUB_OFFSET_OTHER +
+			    (ftype - SENSORS_FEATURE_VID) * FEATURE_TYPE_SIZE +
+			    nr * FEATURE_SIZE + (sftype & 0xFF);
+			break;
+		case SENSORS_FEATURE_BEEP_ENABLE:
+			i = SUB_OFFSET_MISC +
+			    (ftype - SENSORS_FEATURE_BEEP_ENABLE);
+			break;
+		default:
+			i = ftype * FEATURE_TYPE_SIZE +
+			    nr * FEATURE_SIZE +
+			    ((sftype & 0x80) >> 7) * max_subfeatures +
+			    (sftype & 0x7F);
+		}
+
+		if (all_subfeatures[i].name) {
+#ifdef DEBUG
+			sensors_fatal_error(__func__, "Duplicate subfeature");
+#endif
+			continue;
+		}
+
+		/* fill in the subfeature members */
+		all_subfeatures[i].type = sftype;
+		all_subfeatures[i].name = strdup(name);
+		if (!all_subfeatures[i].name)
+			sensors_fatal_error(__func__, "Out of memory");
+
+		/* Other and misc subfeatures are never scaled */
+		if (sftype < SENSORS_SUBFEATURE_VID && !(sftype & 0x80))
+			all_subfeatures[i].flags |= SENSORS_COMPUTE_MAPPING;
+		all_subfeatures[i].flags |= sensors_get_attr_mode(dev_path, name);
+
+		sfnum++;
+	}
+	closedir(dir);
+
+	if (!sfnum) { /* No subfeature */
+		chip->subfeature = NULL;
+		goto exit_free;
+	}
+
+	/* How many main features? */
+	prev_slot = -1;
+	for (i = 0; i < ALL_POSSIBLE_SUBFEATURES; i++) {
+		if (!all_subfeatures[i].name)
+			continue;
+
+		if (i >= SUB_OFFSET_MISC || i / FEATURE_SIZE != prev_slot) {
+			fnum++;
+			prev_slot = i / FEATURE_SIZE;
+		}
+	}
+
+	dyn_subfeatures = calloc(sfnum, sizeof(sensors_subfeature));
+	dyn_features = calloc(fnum, sizeof(sensors_feature));
+	if (!dyn_subfeatures || !dyn_features)
+		sensors_fatal_error(__func__, "Out of memory");
+
+	/* Copy from the sparse array to the compact array */
+	sfnum = 0;
+	fnum = -1;
+	prev_slot = -1;
+	for (i = 0; i < ALL_POSSIBLE_SUBFEATURES; i++) {
+		if (!all_subfeatures[i].name)
+			continue;
+
+		/* New main feature? */
+		if (i >= SUB_OFFSET_MISC || i / FEATURE_SIZE != prev_slot) {
+			ftype = all_subfeatures[i].type >> 8;
+			fnum++;
+			prev_slot = i / FEATURE_SIZE;
+
+			dyn_features[fnum].name = get_feature_name(ftype,
+						all_subfeatures[i].name);
+			dyn_features[fnum].number = fnum;
+			dyn_features[fnum].first_subfeature = sfnum;
+			dyn_features[fnum].type = ftype;
+		}
+
+		dyn_subfeatures[sfnum] = all_subfeatures[i];
+		dyn_subfeatures[sfnum].number = sfnum;
+		/* Back to the feature */
+		dyn_subfeatures[sfnum].mapping = fnum;
+
+		sfnum++;
+	}
+
+	chip->subfeature = dyn_subfeatures;
+	chip->subfeature_count = sfnum;
+	chip->feature = dyn_features;
+	chip->feature_count = ++fnum;
+
+exit_free:
+	free(all_subfeatures);
+	return 0;
+}
+
+/* returns !0 if sysfs filesystem was found, 0 otherwise */
+int sensors_init_sysfs(void)
+{
+	struct statfs statfsbuf;
+
+	snprintf(sensors_sysfs_mount, NAME_MAX, "%s", "/sys");
+	if (statfs(sensors_sysfs_mount, &statfsbuf) < 0
+	 || statfsbuf.f_type != SYSFS_MAGIC)
+		return 0;
+
+	return 1;
+}
+
+/* returns: number of devices added (0 or 1) if successful, <0 otherwise */
+static int sensors_read_one_sysfs_chip(const char *dev_path,
+				       const char *dev_name,
+				       const char *hwmon_path)
+{
+	int domain, bus, slot, fn, vendor, product, id;
+	int err = -SENSORS_ERR_KERNEL;
+	char *bus_attr;
+	char bus_path[NAME_MAX];
+	char linkpath[NAME_MAX];
+	char subsys_path[NAME_MAX], *subsys;
+	int sub_len;
+	sensors_chip_features entry;
+
+	/* ignore any device without name attribute */
+	if (!(entry.chip.prefix = sysfs_read_attr(hwmon_path, "name")))
+		return 0;
+
+	entry.chip.path = strdup(hwmon_path);
+	if (!entry.chip.path)
+		sensors_fatal_error(__func__, "Out of memory");
+
+	if (dev_path == NULL) {
+		/* Virtual device */
+		entry.chip.bus.type = SENSORS_BUS_TYPE_VIRTUAL;
+		entry.chip.bus.nr = 0;
+		/* For now we assume that virtual devices are unique */
+		entry.chip.addr = 0;
+		goto done;
+	}
+
+	/* Find bus type */
+	snprintf(linkpath, NAME_MAX, "%s/subsystem", dev_path);
+	sub_len = readlink(linkpath, subsys_path, NAME_MAX - 1);
+	if (sub_len < 0 && errno == ENOENT) {
+		/* Fallback to "bus" link for kernels <= 2.6.17 */
+		snprintf(linkpath, NAME_MAX, "%s/bus", dev_path);
+		sub_len = readlink(linkpath, subsys_path, NAME_MAX - 1);
+	}
+	if (sub_len < 0) {
+		/* Older kernels (<= 2.6.11) have neither the subsystem
+		   symlink nor the bus symlink */
+		if (errno == ENOENT)
+			subsys = NULL;
+		else
+			goto exit_free;
+	} else {
+		subsys_path[sub_len] = '\0';
+		subsys = strrchr(subsys_path, '/') + 1;
+	}
+
+	if ((!subsys || !strcmp(subsys, "i2c")) &&
+	    sscanf(dev_name, "%hd-%x", &entry.chip.bus.nr,
+		   &entry.chip.addr) == 2) {
+		/* find out if legacy ISA or not */
+		if (entry.chip.bus.nr == 9191) {
+			entry.chip.bus.type = SENSORS_BUS_TYPE_ISA;
+			entry.chip.bus.nr = 0;
+		} else {
+			entry.chip.bus.type = SENSORS_BUS_TYPE_I2C;
+			snprintf(bus_path, sizeof(bus_path),
+				"%s/class/i2c-adapter/i2c-%d/device",
+				sensors_sysfs_mount, entry.chip.bus.nr);
+
+			if ((bus_attr = sysfs_read_attr(bus_path, "name"))) {
+				if (!strncmp(bus_attr, "ISA ", 4)) {
+					entry.chip.bus.type = SENSORS_BUS_TYPE_ISA;
+					entry.chip.bus.nr = 0;
+				}
+
+				free(bus_attr);
+			}
+		}
+	} else
+	if ((!subsys || !strcmp(subsys, "spi")) &&
+	    sscanf(dev_name, "spi%hd.%d", &entry.chip.bus.nr,
+		   &entry.chip.addr) == 2) {
+		/* SPI */
+		entry.chip.bus.type = SENSORS_BUS_TYPE_SPI;
+	} else
+	if ((!subsys || !strcmp(subsys, "pci")) &&
+	    sscanf(dev_name, "%x:%x:%x.%x", &domain, &bus, &slot, &fn) == 4) {
+		/* PCI */
+		entry.chip.addr = (domain << 16) + (bus << 8) + (slot << 3) + fn;
+		entry.chip.bus.type = SENSORS_BUS_TYPE_PCI;
+		entry.chip.bus.nr = 0;
+	} else
+	if ((!subsys || !strcmp(subsys, "platform") ||
+			!strcmp(subsys, "of_platform"))) {
+		/* must be new ISA (platform driver) */
+		if (sscanf(dev_name, "%*[a-z0-9_].%d", &entry.chip.addr) != 1)
+			entry.chip.addr = 0;
+		entry.chip.bus.type = SENSORS_BUS_TYPE_ISA;
+		entry.chip.bus.nr = 0;
+	} else if (subsys && !strcmp(subsys, "acpi")) {
+		entry.chip.bus.type = SENSORS_BUS_TYPE_ACPI;
+		/* For now we assume that acpi devices are unique */
+		entry.chip.bus.nr = 0;
+		entry.chip.addr = 0;
+	} else
+	if (subsys && !strcmp(subsys, "hid") &&
+	    sscanf(dev_name, "%x:%x:%x.%x", &bus, &vendor, &product, &id) == 4) {
+		entry.chip.bus.type = SENSORS_BUS_TYPE_HID;
+		/* As of kernel 2.6.32, the hid device names don't look good */
+		entry.chip.bus.nr = bus;
+		entry.chip.addr = id;
+	} else {
+		/* Ignore unknown device */
+		err = 0;
+		goto exit_free;
+	}
+
+done:
+	if (sensors_read_dynamic_chip(&entry, hwmon_path) < 0)
+		goto exit_free;
+	if (!entry.subfeature) { /* No subfeature, discard chip */
+		err = 0;
+		goto exit_free;
+	}
+	sensors_add_proc_chips(&entry);
+
+	return 1;
+
+exit_free:
+	free(entry.chip.prefix);
+	free(entry.chip.path);
+	return err;
+}
+
+static int sensors_add_hwmon_device_compat(const char *path,
+					   const char *dev_name)
+{
+	int err;
+
+	err = sensors_read_one_sysfs_chip(path, dev_name, path);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/* returns 0 if successful, !0 otherwise */
+static int sensors_read_sysfs_chips_compat(void)
+{
+	int ret;
+
+	ret = sysfs_foreach_busdev("i2c", sensors_add_hwmon_device_compat);
+	if (ret && ret != ENOENT)
+		return -SENSORS_ERR_KERNEL;
+
+	return 0;
+}
+
+static int sensors_add_hwmon_device(const char *path, const char *classdev)
+{
+	char linkpath[NAME_MAX];
+	char device[NAME_MAX], *device_p;
+	int dev_len, err;
+	(void)classdev; /* hide warning */
+
+	snprintf(linkpath, NAME_MAX, "%s/device", path);
+	dev_len = readlink(linkpath, device, NAME_MAX - 1);
+	if (dev_len < 0) {
+		/* No device link? Treat as virtual */
+		err = sensors_read_one_sysfs_chip(NULL, NULL, path);
+	} else {
+		device[dev_len] = '\0';
+		device_p = strrchr(device, '/') + 1;
+
+		/* The attributes we want might be those of the hwmon class
+		   device, or those of the device itself. */
+		err = sensors_read_one_sysfs_chip(linkpath, device_p, path);
+		if (err == 0)
+			err = sensors_read_one_sysfs_chip(linkpath, device_p,
+							  linkpath);
+	}
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/* returns 0 if successful, !0 otherwise */
+int sensors_read_sysfs_chips(void)
+{
+	int ret;
+
+	ret = sysfs_foreach_classdev("hwmon", sensors_add_hwmon_device);
+	if (ret == ENOENT) {
+		/* compatibility function for kernel 2.6.n where n <= 13 */
+		return sensors_read_sysfs_chips_compat();
+	}
+
+	if (ret > 0)
+		ret = -SENSORS_ERR_KERNEL;
+	return ret;
+}
+
+/* returns 0 if successful, !0 otherwise */
+static int sensors_add_i2c_bus(const char *path, const char *classdev)
+{
+	sensors_bus entry;
+
+	if (sscanf(classdev, "i2c-%hd", &entry.bus.nr) != 1 ||
+	    entry.bus.nr == 9191) /* legacy ISA */
+		return 0;
+	entry.bus.type = SENSORS_BUS_TYPE_I2C;
+
+	/* Get the adapter name from the classdev "name" attribute
+	 * (Linux 2.6.20 and later). If it fails, fall back to
+	 * the device "name" attribute (for older kernels). */
+	entry.adapter = sysfs_read_attr(path, "name");
+	if (!entry.adapter)
+		entry.adapter = sysfs_read_attr(path, "device/name");
+	if (entry.adapter)
+		sensors_add_proc_bus(&entry);
+
+	return 0;
+}
+
+/* returns 0 if successful, !0 otherwise */
+int sensors_read_sysfs_bus(void)
+{
+	int ret;
+
+	ret = sysfs_foreach_classdev("i2c-adapter", sensors_add_i2c_bus);
+	if (ret == ENOENT)
+		ret = sysfs_foreach_busdev("i2c", sensors_add_i2c_bus);
+	if (ret && ret != ENOENT)
+		return -SENSORS_ERR_KERNEL;
+
+	return 0;
+}
+
+int sensors_read_sysfs_attr(const sensors_chip_name *name,
+			    const sensors_subfeature *subfeature,
+			    double *value)
+{
+	char n[NAME_MAX];
+	int f;
+
+	snprintf(n, NAME_MAX, "%s/%s", name->path, subfeature->name);
+	if ((f = open(n, O_RDONLY)) != -1) {
+		int res, err = 0;
+		char buf[512];
+		int count;
+
+		errno = 0;
+		if ((count = read(f, buf, sizeof(buf) - 1)) == -1) {
+			if (errno == EIO)
+				err = -SENSORS_ERR_IO;
+			else 
+				err = -SENSORS_ERR_ACCESS_R;
+		} else {
+			buf[count] = '\0';
+			errno = 0;
+			res = sscanf(buf, "%lf", value);
+			if (res == EOF && errno == EIO)
+				err = -SENSORS_ERR_IO;
+			else if (res != 1)
+				err = -SENSORS_ERR_ACCESS_R;
+		}
+		res = close(f);
+		if (err)
+			return err;
+
+		if (res != 0) {
+			if (errno == EIO)
+				return -SENSORS_ERR_IO;
+			else 
+				return -SENSORS_ERR_ACCESS_R;
+		}
+		if (!sensors_sysfs_no_scaling)
+			*value /= get_type_scaling(subfeature->type);
+	} else
+		return -SENSORS_ERR_KERNEL;
+
+	return 0;
+}
+
+int sensors_write_sysfs_attr(const sensors_chip_name *name,
+			     const sensors_subfeature *subfeature,
+			     double value)
+{
+	char n[NAME_MAX];
+	FILE *f;
+
+	snprintf(n, NAME_MAX, "%s/%s", name->path, subfeature->name);
+	if ((f = fopen(n, "w"))) {
+		int res, err = 0;
+
+		if (!sensors_sysfs_no_scaling)
+			value *= get_type_scaling(subfeature->type);
+		res = fprintf(f, "%d", (int) value);
+		if (res == -EIO)
+			err = -SENSORS_ERR_IO;
+		else if (res < 0)
+			err = -SENSORS_ERR_ACCESS_W;
+		res = fclose(f);
+		if (err)
+			return err;
+
+		if (res == EOF) {
+			if (errno == EIO)
+				return -SENSORS_ERR_IO;
+			else 
+				return -SENSORS_ERR_ACCESS_W;
+		}
+	} else
+		return -SENSORS_ERR_KERNEL;
+
+	return 0;
+}
diff --git a/tools/gator/daemon/libsensors/sysfs.h b/tools/gator/daemon/libsensors/sysfs.h
new file mode 100644
index 000000000000..38584afd028e
--- /dev/null
+++ b/tools/gator/daemon/libsensors/sysfs.h
@@ -0,0 +1,43 @@
+/*
+    sysfs.h - part of libsensors, a library for reading Linux sensor data
+    Copyright (C)             Mark M. Hoffman <mhoffman@lightlink.com>
+    Copyright (C) 2007        Jean Delvare <khali@linux-fr.org>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+    MA 02110-1301 USA.
+*/
+
+#ifndef SENSORS_LIB_SYSFS_H
+#define SENSORS_LIB_SYSFS_H
+
+extern char sensors_sysfs_mount[];
+
+int sensors_init_sysfs(void);
+
+int sensors_read_sysfs_chips(void);
+
+int sensors_read_sysfs_bus(void);
+
+/* Read a value out of a sysfs attribute file */
+int sensors_read_sysfs_attr(const sensors_chip_name *name,
+			    const sensors_subfeature *subfeature,
+			    double *value);
+
+/* Write a value to a sysfs attribute file */
+int sensors_write_sysfs_attr(const sensors_chip_name *name,
+			     const sensors_subfeature *subfeature,
+			     double value);
+
+#endif /* !SENSORS_LIB_SYSFS_H */
diff --git a/tools/gator/daemon/libsensors/version.h b/tools/gator/daemon/libsensors/version.h
new file mode 100644
index 000000000000..76ceb08c29d0
--- /dev/null
+++ b/tools/gator/daemon/libsensors/version.h
@@ -0,0 +1 @@
+#define LM_VERSION "3.3.2"
diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp
new file mode 100644
index 000000000000..fbce1e15d0d0
--- /dev/null
+++ b/tools/gator/daemon/main.cpp
@@ -0,0 +1,584 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "CCNDriver.h"
+#include "Child.h"
+#include "EventsXML.h"
+#include "Logging.h"
+#include "Monitor.h"
+#include "OlySocket.h"
+#include "OlyUtility.h"
+#include "SessionData.h"
+#include "Setup.h"
+
+extern Child* child;
+static int shutdownFilesystem();
+static pthread_mutex_t numSessions_mutex;
+static OlyServerSocket* sock = NULL;
+static Monitor monitor;
+static int numSessions = 0;
+static bool driverRunningAtStart = false;
+static bool driverMountedAtStart = false;
+
+struct cmdline_t {
+	char *module;
+	int port;
+	bool update;
+};
+
+#define DEFAULT_PORT 8080
+
+void cleanUp() {
+	if (shutdownFilesystem() == -1) {
+		logg->logMessage("Error shutting down gator filesystem");
+	}
+	delete sock;
+	delete util;
+	delete logg;
+}
+
+// CTRL C Signal Handler
+static void handler(int signum) {
+	logg->logMessage("Received signal %d, gator daemon exiting", signum);
+
+	// Case 1: both child and parent receive the signal
+	if (numSessions > 0) {
+		// Arbitrary sleep of 1 second to give time for the child to exit;
+		// if something bad happens, continue the shutdown process regardless
+		sleep(1);
+	}
+
+	// Case 2: only the parent received the signal
+	if (numSessions > 0) {
+		// Kill child threads - the first signal exits gracefully
+		logg->logMessage("Killing process group as %d child was running when signal was received", numSessions);
+		kill(0, SIGINT);
+
+		// Give time for the child to exit
+		sleep(1);
+
+		if (numSessions > 0) {
+			// The second signal force kills the child
+			logg->logMessage("Force kill the child");
+			kill(0, SIGINT);
+			// Again, sleep for 1 second
+			sleep(1);
+
+			if (numSessions > 0) {
+				// Something bad has really happened; the child is not exiting and therefore may hold the /dev/gator resource open
+				printf("Unable to kill the gatord child process, thus gator.ko may still be loaded.\n");
+			}
+		}
+	}
+
+	cleanUp();
+	exit(0);
+}
+
+// Child exit Signal Handler
+static void child_exit(int) {
+	int status;
+	int pid = wait(&status);
+	if (pid != -1) {
+		pthread_mutex_lock(&numSessions_mutex);
+		numSessions--;
+		pthread_mutex_unlock(&numSessions_mutex);
+		logg->logMessage("Child process %d exited with status %d", pid, status);
+	}
+}
+
+static const int UDP_REQ_PORT = 30001;
+
+typedef struct {
+	char rviHeader[8];
+	uint32_t messageID;
+	uint8_t ethernetAddress[8];
+	uint32_t ethernetType;
+	uint32_t dhcp;
+	char dhcpName[40];
+	uint32_t ipAddress;
+	uint32_t defaultGateway;
+	uint32_t subnetMask;
+	uint32_t activeConnections;
+} RVIConfigureInfo;
+
+static const char DST_REQ[] = { 'D', 'S', 'T', '_', 'R', 'E', 'Q', ' ', 0, 0, 0, 0x64 };
+
+class UdpListener {
+public:
+	UdpListener() : mDstAns(), mReq(-1) {}
+
+	void setup(int port) {
+		mReq = udpPort(UDP_REQ_PORT);
+
+		// Format the answer buffer
+		memset(&mDstAns, 0, sizeof(mDstAns));
+		memcpy(mDstAns.rviHeader, "STR_ANS ", sizeof(mDstAns.rviHeader));
+		if (gethostname(mDstAns.dhcpName, sizeof(mDstAns.dhcpName) - 1) != 0) {
+			logg->logError(__FILE__, __LINE__, "gethostname failed");
+			handleException();
+		}
+		// Subvert the defaultGateway field for the port number
+		if (port != DEFAULT_PORT) {
+			mDstAns.defaultGateway = port;
+		}
+		// Subvert the subnetMask field for the protocol version
+		mDstAns.subnetMask = PROTOCOL_VERSION;
+	}
+
+	int getReq() const {
+		return mReq;
+	}
+
+	void handle() {
+		char buf[128];
+		struct sockaddr_in6 sockaddr;
+		socklen_t addrlen;
+		int read;
+		addrlen = sizeof(sockaddr);
+		read = recvfrom(mReq, &buf, sizeof(buf), 0, (struct sockaddr *)&sockaddr, &addrlen);
+		if (read < 0) {
+			logg->logError(__FILE__, __LINE__, "recvfrom failed");
+			handleException();
+		} else if ((read == 12) && (memcmp(buf, DST_REQ, sizeof(DST_REQ)) == 0)) {
+			// Don't care if sendto fails - gatord shouldn't exit because of it and Streamline will retry
+			sendto(mReq, &mDstAns, sizeof(mDstAns), 0, (struct sockaddr *)&sockaddr, addrlen);
+		}
+	}
+
+	void close() {
+		::close(mReq);
+	}
+
+private:
+	int udpPort(int port) {
+		int s;
+		struct sockaddr_in6 sockaddr;
+		int on;
+		int family = AF_INET6;
+
+		s = socket_cloexec(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
+		if (s == -1) {
+			family = AF_INET;
+			s = socket_cloexec(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+			if (s == -1) {
+				logg->logError(__FILE__, __LINE__, "socket failed");
+				handleException();
+			}
+		}
+
+		on = 1;
+		if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) {
+			logg->logError(__FILE__, __LINE__, "setsockopt failed");
+			handleException();
+		}
+
+		memset((void*)&sockaddr, 0, sizeof(sockaddr));
+		sockaddr.sin6_family = family;
+		sockaddr.sin6_port = htons(port);
+		sockaddr.sin6_addr = in6addr_any;
+		if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+			logg->logError(__FILE__, __LINE__, "socket failed");
+			handleException();
+		}
+
+		return s;
+	}
+
+	RVIConfigureInfo mDstAns;
+	int mReq;
+};
+
+static UdpListener udpListener;
+
+// retval: -1 = failure; 0 = was already mounted; 1 = successfully mounted
+static int mountGatorFS() {
+	// If already mounted,
+	if (access("/dev/gator/buffer", F_OK) == 0) {
+		return 0;
+	}
+
+	// else, mount the filesystem
+	mkdir("/dev/gator", S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+	if (mount("nodev", "/dev/gator", "gatorfs", 0, NULL) != 0) {
+		return -1;
+	} else {
+		return 1;
+	}
+}
+
+static bool init_module (const char * const location) {
+	bool ret(false);
+	const int fd = open(location, O_RDONLY | O_CLOEXEC);
+	if (fd >= 0) {
+		struct stat st;
+		if (fstat(fd, &st) == 0) {
+			void * const p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+			if (p != MAP_FAILED) {
+				if (syscall(__NR_init_module, p, (unsigned long)st.st_size, "") == 0) {
+					ret = true;
+				}
+				munmap(p, st.st_size);
+			}
+		}
+		close(fd);
+	}
+
+	return ret;
+}
+
+static bool setupFilesystem(char* module) {
+	if (module) {
+		// unmount and rmmod if the module was specified on the commandline, i.e. ensure that the specified module is indeed running
+		shutdownFilesystem();
+
+		// if still mounted
+		if (access("/dev/gator/buffer", F_OK) == 0) {
+			logg->logError(__FILE__, __LINE__, "Unable to remove the running gator.ko. Manually remove the module or use the running module by not specifying one on the commandline");
+			handleException();
+		}
+	}
+
+	const int retval = mountGatorFS();
+	if (retval == 1) {
+		logg->logMessage("Driver already running at startup");
+		driverRunningAtStart = true;
+	} else if (retval == 0) {
+		logg->logMessage("Driver already mounted at startup");
+		driverRunningAtStart = driverMountedAtStart = true;
+	} else {
+		char command[256]; // arbitrarily large amount
+		char location[256]; // arbitrarily large amount
+
+		if (module) {
+			strncpy(location, module, sizeof(location));
+		} else {
+			// Is the driver co-located in the same directory?
+			if (util->getApplicationFullPath(location, sizeof(location)) != 0) { // allow some buffer space
+				logg->logMessage("Unable to determine the full path of gatord, the cwd will be used");
+			}
+			strncat(location, "gator.ko", sizeof(location) - strlen(location) - 1);
+		}
+
+		if (access(location, F_OK) == -1) {
+			if (module == NULL) {
+				// The gator kernel is not already loaded and unable to locate gator.ko in the default location
+				return false;
+			} else {
+				// gator location specified on the command line but it was not found
+				logg->logError(__FILE__, __LINE__, "gator module not found at %s", location);
+				handleException();
+			}
+		}
+
+		// Load driver
+		bool success = init_module(location);
+		if (!success) {
+			logg->logMessage("init_module failed, trying insmod");
+			snprintf(command, sizeof(command), "insmod %s >/dev/null 2>&1", location);
+			if (system(command) != 0) {
+				logg->logMessage("Unable to load gator.ko driver with command: %s", command);
+				logg->logError(__FILE__, __LINE__, "Unable to load (insmod) gator.ko driver:\n  >>> gator.ko must be built against the current kernel version & configuration\n  >>> See dmesg for more details");
+				handleException();
+			}
+		}
+
+		if (mountGatorFS() == -1) {
+			logg->logError(__FILE__, __LINE__, "Unable to mount the gator filesystem needed for profiling.");
+			handleException();
+		}
+	}
+
+	return true;
+}
+
+static int shutdownFilesystem() {
+	if (driverMountedAtStart == false) {
+		umount("/dev/gator");
+	}
+	if (driverRunningAtStart == false) {
+		if (syscall(__NR_delete_module, "gator", O_NONBLOCK) != 0) {
+			logg->logMessage("delete_module failed, trying rmmod");
+			if (system("rmmod gator >/dev/null 2>&1") != 0) {
+				return -1;
+			}
+		}
+	}
+
+	return 0; // success
+}
+
+static const char OPTSTRING[] = "hvudap:s:c:e:m:o:";
+
+static bool hasDebugFlag(int argc, char** argv) {
+	int c;
+
+	optind = 1;
+	while ((c = getopt(argc, argv, OPTSTRING)) != -1) {
+		if (c == 'd') {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static struct cmdline_t parseCommandLine(int argc, char** argv) {
+	struct cmdline_t cmdline;
+	cmdline.port = DEFAULT_PORT;
+	cmdline.module = NULL;
+	cmdline.update = false;
+	char version_string[256]; // arbitrary length to hold the version information
+	int c;
+
+	// build the version string
+	if (PROTOCOL_VERSION < PROTOCOL_DEV) {
+		snprintf(version_string, sizeof(version_string), "Streamline gatord version %d (DS-5 v5.%d)", PROTOCOL_VERSION, PROTOCOL_VERSION);
+	} else {
+		snprintf(version_string, sizeof(version_string), "Streamline gatord development version %d", PROTOCOL_VERSION);
+	}
+
+	optind = 1;
+	while ((c = getopt(argc, argv, OPTSTRING)) != -1) {
+		switch (c) {
+			case 'c':
+				gSessionData->mConfigurationXMLPath = optarg;
+				break;
+			case 'd':
+				// Already handled
+				break;
+			case 'e':
+				gSessionData->mEventsXMLPath = optarg;
+				break;
+			case 'm':
+				cmdline.module = optarg;
+				break;
+			case 'p':
+				cmdline.port = strtol(optarg, NULL, 10);
+				break;
+			case 's':
+				gSessionData->mSessionXMLPath = optarg;
+				break;
+			case 'o':
+				gSessionData->mTargetPath = optarg;
+				break;
+			case 'u':
+				cmdline.update = true;
+				break;
+			case 'a':
+				gSessionData->mAllowCommands = true;
+				break;
+			case 'h':
+			case '?':
+				logg->logError(__FILE__, __LINE__,
+					"%s. All parameters are optional:\n"
+					"-c config_xml   path and filename of the configuration.xml to use\n"
+					"-e events_xml   path and filename of the events.xml to use\n"
+					"-h              this help page\n"
+					"-m module       path and filename of gator.ko\n"
+					"-p port_number  port upon which the server listens; default is 8080\n"
+					"-s session_xml  path and filename of a session.xml used for local capture\n"
+					"-o apc_dir      path and name of the output for a local capture\n"
+					"-v              version information\n"
+					"-d              enable debug messages\n"
+					"-a              allow the user user to provide a command to run at the start of a capture"
+					, version_string);
+				handleException();
+				break;
+			case 'v':
+				logg->logError(__FILE__, __LINE__, version_string);
+				handleException();
+				break;
+		}
+	}
+
+	// Error checking
+	if (cmdline.port != DEFAULT_PORT && gSessionData->mSessionXMLPath != NULL) {
+		logg->logError(__FILE__, __LINE__, "Only a port or a session xml can be specified, not both");
+		handleException();
+	}
+
+	if (gSessionData->mTargetPath != NULL && gSessionData->mSessionXMLPath == NULL) {
+		logg->logError(__FILE__, __LINE__, "Missing -s command line option required for a local capture.");
+		handleException();
+	}
+
+	if (optind < argc) {
+		logg->logError(__FILE__, __LINE__, "Unknown argument: %s. Use '-h' for help.", argv[optind]);
+		handleException();
+	}
+
+	return cmdline;
+}
+
+static void handleClient() {
+	OlySocket client(sock->acceptConnection());
+
+	int pid = fork();
+	if (pid < 0) {
+		// Error
+		logg->logError(__FILE__, __LINE__, "Fork process failed. Please power cycle the target device if this error persists.");
+	} else if (pid == 0) {
+		// Child
+		sock->closeServerSocket();
+		udpListener.close();
+		monitor.close();
+		child = new Child(&client, numSessions + 1);
+		child->run();
+		delete child;
+		exit(0);
+	} else {
+		// Parent
+		client.closeSocket();
+
+		pthread_mutex_lock(&numSessions_mutex);
+		numSessions++;
+		pthread_mutex_unlock(&numSessions_mutex);
+
+		// Maximum number of connections is 2
+		int wait = 0;
+		while (numSessions > 1) {
+			// Throttle until one of the children exits before continuing to accept another socket connection
+			logg->logMessage("%d sessions active!", numSessions);
+			if (wait++ >= 10) { // Wait no more than 10 seconds
+				// Kill last created child
+				kill(pid, SIGALRM);
+				break;
+			}
+			sleep(1);
+		}
+	}
+}
+
+// Gator data flow: collector -> collector fifo -> sender
+int main(int argc, char** argv) {
+	// Ensure proper signal handling by making gatord the process group leader
+	//   e.g. it may not be the group leader when launched as 'sudo gatord'
+	setsid();
+
+  // Set up global thread-safe logging
+	logg = new Logging(hasDebugFlag(argc, argv));
+	// Global data class
+	gSessionData = new SessionData();
+	// Set up global utility class
+	util = new OlyUtility();
+
+	// Initialize drivers
+	new CCNDriver();
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-main", 0, 0, 0);
+	pthread_mutex_init(&numSessions_mutex, NULL);
+
+	signal(SIGINT, handler);
+	signal(SIGTERM, handler);
+	signal(SIGABRT, handler);
+
+	// Set to high priority
+	if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), -19) == -1) {
+		logg->logMessage("setpriority() failed");
+	}
+
+	// Parse the command line parameters
+	struct cmdline_t cmdline = parseCommandLine(argc, argv);
+
+	if (cmdline.update) {
+		return update(argv[0]);
+	}
+
+	// Verify root permissions
+	uid_t euid = geteuid();
+	if (euid) {
+		logg->logError(__FILE__, __LINE__, "gatord must be launched with root privileges");
+		handleException();
+	}
+
+	// Call before setting up the SIGCHLD handler, as system() spawns child processes
+	if (!setupFilesystem(cmdline.module)) {
+		logg->logMessage("Unable to setup gatorfs, trying perf");
+		if (!gSessionData->perf.setup()) {
+			logg->logError(__FILE__, __LINE__,
+				       "Unable to locate gator.ko driver:\n"
+				       "  >>> gator.ko should be co-located with gatord in the same directory\n"
+				       "  >>> OR insmod gator.ko prior to launching gatord\n"
+				       "  >>> OR specify the location of gator.ko on the command line\n"
+				       "  >>> OR run Linux 3.4 or later with perf (CONFIG_PERF_EVENTS and CONFIG_HW_PERF_EVENTS) and tracing (CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER) support to collect data via userspace only");
+			handleException();
+		}
+	}
+
+	{
+		EventsXML eventsXML;
+		mxml_node_t *xml = eventsXML.getTree();
+		// Initialize all drivers
+		for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+			driver->readEvents(xml);
+		}
+		mxmlDelete(xml);
+	}
+
+	// Handle child exit codes
+	signal(SIGCHLD, child_exit);
+
+	// Ignore the SIGPIPE signal so that any send to a broken socket will return an error code instead of asserting a signal
+	// Handling the error at the send function call is much easier than trying to do anything intelligent in the sig handler
+	signal(SIGPIPE, SIG_IGN);
+
+	// If the command line argument is a session xml file, no need to open a socket
+	if (gSessionData->mSessionXMLPath) {
+		child = new Child();
+		child->run();
+		delete child;
+	} else {
+		gSessionData->annotateListener.setup();
+		sock = new OlyServerSocket(cmdline.port);
+		udpListener.setup(cmdline.port);
+		if (!monitor.init() ||
+				!monitor.add(sock->getFd()) ||
+				!monitor.add(udpListener.getReq()) ||
+				!monitor.add(gSessionData->annotateListener.getFd()) ||
+				false) {
+			logg->logError(__FILE__, __LINE__, "Monitor setup failed");
+			handleException();
+		}
+		// Forever loop, can be exited via a signal or exception
+		while (1) {
+			struct epoll_event events[2];
+			logg->logMessage("Waiting on connection...");
+			int ready = monitor.wait(events, ARRAY_LENGTH(events), -1);
+			if (ready < 0) {
+				logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
+				handleException();
+			}
+			for (int i = 0; i < ready; ++i) {
+				if (events[i].data.fd == sock->getFd()) {
+					handleClient();
+				} else if (events[i].data.fd == udpListener.getReq()) {
+					udpListener.handle();
+				} else if (events[i].data.fd == gSessionData->annotateListener.getFd()) {
+					gSessionData->annotateListener.handle();
+				}
+			}
+		}
+	}
+
+	cleanUp();
+	return 0;
+}
diff --git a/tools/gator/daemon/mxml/COPYING b/tools/gator/daemon/mxml/COPYING
new file mode 100644
index 000000000000..4d0aa78af224
--- /dev/null
+++ b/tools/gator/daemon/mxml/COPYING
@@ -0,0 +1,507 @@
+			   Mini-XML License
+			  September 18, 2010
+
+
+The Mini-XML library and included programs are provided under the
+terms of the GNU Library General Public License version 2 (LGPL2)
+with the following exceptions:
+
+  1. Static linking of applications to the Mini-XML library
+does not constitute a derivative work and does not require
+the author to provide source code for the application, use
+the shared Mini-XML libraries, or link their applications
+against a user-supplied version of Mini-XML.
+
+If you link the application to a modified version of
+Mini-XML, then the changes to Mini-XML must be provided
+under the terms of the LGPL2 in sections 1, 2, and 4.
+
+  2. You do not have to provide a copy of the Mini-XML license
+with programs that are linked to the Mini-XML library, nor
+do you have to identify the Mini-XML license in your
+program or documentation as required by section 6 of the
+LGPL2.
+
+
+		  GNU LIBRARY GENERAL PUBLIC LICENSE
+			 Version 2, June 1991
+
+	  Copyright (C) 1991 Free Software Foundation, Inc.
+       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+     Everyone is permitted to copy and distribute verbatim copies
+      of this license document, but changing it is not allowed.
+
+    [This is the first released version of the library GPL.  It is
+   numbered 2 because it goes with version 2 of the ordinary GPL.]
+
+			       Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Library General Public License, applies to some
+specially designated Free Software Foundation software, and to any
+other libraries whose authors decide to use it.  You can use it for
+your libraries, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if
+you distribute copies of the library, or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link a program with the library, you must provide
+complete object files to the recipients so that they can relink them
+with the library, after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  Our method of protecting your rights has two steps: (1) copyright
+the library, and (2) offer you this license which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  Also, for each distributor's protection, we want to make certain
+that everyone understands that there is no warranty for this free
+library.  If the library is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original
+version, so that any problems introduced by others will not reflect on
+the original authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that companies distributing free
+software will individually obtain patent licenses, thus in effect
+transforming the program into proprietary software.  To prevent this,
+we have made it clear that any patent must be licensed for everyone's
+free use or not licensed at all.
+
+  Most GNU software, including some libraries, is covered by the ordinary
+GNU General Public License, which was designed for utility programs.  This
+license, the GNU Library General Public License, applies to certain
+designated libraries.  This license is quite different from the ordinary
+one; be sure to read it in full, and don't assume that anything in it is
+the same as in the ordinary license.
+
+  The reason we have a separate public license for some libraries is that
+they blur the distinction we usually make between modifying or adding to a
+program and simply using it.  Linking a program with a library, without
+changing the library, is in some sense simply using the library, and is
+analogous to running a utility program or application program.  However, in
+a textual and legal sense, the linked executable is a combined work, a
+derivative of the original library, and the ordinary General Public License
+treats it as such.
+
+  Because of this blurred distinction, using the ordinary General
+Public License for libraries did not effectively promote software
+sharing, because most developers did not use the libraries.  We
+concluded that weaker conditions might promote sharing better.
+
+  However, unrestricted linking of non-free programs would deprive the
+users of those programs of all benefit from the free status of the
+libraries themselves.  This Library General Public License is intended to
+permit developers of non-free programs to use free libraries, while
+preserving your freedom as a user of such programs to change the free
+libraries that are incorporated in them.  (We have not seen how to achieve
+this as regards changes in header files, but we have achieved it as regards
+changes in the actual functions of the Library.)  The hope is that this
+will lead to faster development of free libraries.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, while the latter only
+works together with the library.
+
+  Note that it is possible for a library to be covered by the ordinary
+General Public License rather than by this special one.
+
+		  GNU LIBRARY GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library which
+contains a notice placed by the copyright holder or other authorized
+party saying it may be distributed under the terms of this Library
+General Public License (also called "this License").  Each licensee is
+addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also compile or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    c) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    d) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the source code distributed need not include anything that is normally
+distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Library General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+     Appendix: How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/tools/gator/daemon/mxml/config.h b/tools/gator/daemon/mxml/config.h
new file mode 100644
index 000000000000..ad6df1d7debe
--- /dev/null
+++ b/tools/gator/daemon/mxml/config.h
@@ -0,0 +1,96 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/*
+ * "$Id: config.h.in 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Configuration file for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+
+/*
+ * Version number...
+ */
+
+#define MXML_VERSION "Mini-XML v2.8"
+
+
+/*
+ * Inline function support...
+ */
+
+#define inline
+
+
+/*
+ * Long long support...
+ */
+
+#define HAVE_LONG_LONG 1
+
+
+/*
+ * Do we have the snprintf() and vsnprintf() functions?
+ */
+
+#define HAVE_SNPRINTF 1
+#define HAVE_VSNPRINTF 1
+
+
+/*
+ * Do we have the strXXX() functions?
+ */
+
+#define HAVE_STRDUP 1
+
+
+/*
+ * Do we have threading support?
+ */
+
+#define HAVE_PTHREAD_H 1
+
+
+/*
+ * Define prototypes for string functions as needed...
+ */
+
+#  ifndef HAVE_STRDUP
+extern char	*_mxml_strdup(const char *);
+#    define strdup _mxml_strdup
+#  endif /* !HAVE_STRDUP */
+
+extern char	*_mxml_strdupf(const char *, ...);
+extern char	*_mxml_vstrdupf(const char *, va_list);
+
+#  ifndef HAVE_SNPRINTF
+extern int	_mxml_snprintf(char *, size_t, const char *, ...);
+#    define snprintf _mxml_snprintf
+#  endif /* !HAVE_SNPRINTF */
+
+#  ifndef HAVE_VSNPRINTF
+extern int	_mxml_vsnprintf(char *, size_t, const char *, va_list);
+#    define vsnprintf _mxml_vsnprintf
+#  endif /* !HAVE_VSNPRINTF */
+
+/*
+ * End of "$Id: config.h.in 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-attr.c b/tools/gator/daemon/mxml/mxml-attr.c
new file mode 100644
index 000000000000..8e89cc1474f8
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-attr.c
@@ -0,0 +1,314 @@
+/*
+ * "$Id: mxml-attr.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Attribute support code for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * Local functions...
+ */
+
+static int	mxml_set_attr(mxml_node_t *node, const char *name,
+		              char *value);
+
+
+/*
+ * 'mxmlElementDeleteAttr()' - Delete an attribute.
+ *
+ * @since Mini-XML 2.4@
+ */
+
+void
+mxmlElementDeleteAttr(mxml_node_t *node,/* I - Element */
+                      const char  *name)/* I - Attribute name */
+{
+  int		i;			/* Looping var */
+  mxml_attr_t	*attr;			/* Cirrent attribute */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlElementDeleteAttr(node=%p, name=\"%s\")\n",
+          node, name ? name : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT || !name)
+    return;
+
+ /*
+  * Look for the attribute...
+  */
+
+  for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
+       i > 0;
+       i --, attr ++)
+  {
+#ifdef DEBUG
+    printf("    %s=\"%s\"\n", attr->name, attr->value);
+#endif /* DEBUG */
+
+    if (!strcmp(attr->name, name))
+    {
+     /*
+      * Delete this attribute...
+      */
+
+      free(attr->name);
+      free(attr->value);
+
+      i --;
+      if (i > 0)
+        memmove(attr, attr + 1, i * sizeof(mxml_attr_t));
+
+      node->value.element.num_attrs --;
+
+      if (node->value.element.num_attrs == 0)
+        free(node->value.element.attrs);
+      return;
+    }
+  }
+}
+
+
+/*
+ * 'mxmlElementGetAttr()' - Get an attribute.
+ *
+ * This function returns NULL if the node is not an element or the
+ * named attribute does not exist.
+ */
+
+const char *				/* O - Attribute value or NULL */
+mxmlElementGetAttr(mxml_node_t *node,	/* I - Element node */
+                   const char  *name)	/* I - Name of attribute */
+{
+  int		i;			/* Looping var */
+  mxml_attr_t	*attr;			/* Cirrent attribute */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlElementGetAttr(node=%p, name=\"%s\")\n",
+          node, name ? name : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT || !name)
+    return (NULL);
+
+ /*
+  * Look for the attribute...
+  */
+
+  for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
+       i > 0;
+       i --, attr ++)
+  {
+#ifdef DEBUG
+    printf("    %s=\"%s\"\n", attr->name, attr->value);
+#endif /* DEBUG */
+
+    if (!strcmp(attr->name, name))
+    {
+#ifdef DEBUG
+      printf("    Returning \"%s\"!\n", attr->value);
+#endif /* DEBUG */
+      return (attr->value);
+    }
+  }
+
+ /*
+  * Didn't find attribute, so return NULL...
+  */
+
+#ifdef DEBUG
+  puts("    Returning NULL!\n");
+#endif /* DEBUG */
+
+  return (NULL);
+}
+
+
+/*
+ * 'mxmlElementSetAttr()' - Set an attribute.
+ *
+ * If the named attribute already exists, the value of the attribute
+ * is replaced by the new string value. The string value is copied
+ * into the element node. This function does nothing if the node is
+ * not an element.
+ */
+
+void
+mxmlElementSetAttr(mxml_node_t *node,	/* I - Element node */
+                   const char  *name,	/* I - Name of attribute */
+                   const char  *value)	/* I - Attribute value */
+{
+  char	*valuec;			/* Copy of value */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlElementSetAttr(node=%p, name=\"%s\", value=\"%s\")\n",
+          node, name ? name : "(null)", value ? value : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT || !name)
+    return;
+
+  if (value)
+    valuec = strdup(value);
+  else
+    valuec = NULL;
+
+  if (mxml_set_attr(node, name, valuec))
+    free(valuec);
+}
+
+
+/*
+ * 'mxmlElementSetAttrf()' - Set an attribute with a formatted value.
+ *
+ * If the named attribute already exists, the value of the attribute
+ * is replaced by the new formatted string. The formatted string value is
+ * copied into the element node. This function does nothing if the node
+ * is not an element.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+void
+mxmlElementSetAttrf(mxml_node_t *node,	/* I - Element node */
+                    const char  *name,	/* I - Name of attribute */
+                    const char  *format,/* I - Printf-style attribute value */
+		    ...)		/* I - Additional arguments as needed */
+{
+  va_list	ap;			/* Argument pointer */
+  char		*value;			/* Value */
+
+
+#ifdef DEBUG
+  fprintf(stderr,
+          "mxmlElementSetAttrf(node=%p, name=\"%s\", format=\"%s\", ...)\n",
+          node, name ? name : "(null)", format ? format : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT || !name || !format)
+    return;
+
+ /*
+  * Format the value...
+  */
+
+  va_start(ap, format);
+  value = _mxml_vstrdupf(format, ap);
+  va_end(ap);
+
+  if (!value)
+    mxml_error("Unable to allocate memory for attribute '%s' in element %s!",
+               name, node->value.element.name);
+  else if (mxml_set_attr(node, name, value))
+    free(value);
+}
+
+
+/*
+ * 'mxml_set_attr()' - Set or add an attribute name/value pair.
+ */
+
+static int				/* O - 0 on success, -1 on failure */
+mxml_set_attr(mxml_node_t *node,	/* I - Element node */
+              const char  *name,	/* I - Attribute name */
+              char        *value)	/* I - Attribute value */
+{
+  int		i;			/* Looping var */
+  mxml_attr_t	*attr;			/* New attribute */
+
+
+ /*
+  * Look for the attribute...
+  */
+
+  for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
+       i > 0;
+       i --, attr ++)
+    if (!strcmp(attr->name, name))
+    {
+     /*
+      * Free the old value as needed...
+      */
+
+      if (attr->value)
+        free(attr->value);
+
+      attr->value = value;
+
+      return (0);
+    }
+
+ /*
+  * Add a new attribute...
+  */
+
+  if (node->value.element.num_attrs == 0)
+    attr = malloc(sizeof(mxml_attr_t));
+  else
+    attr = realloc(node->value.element.attrs,
+                   (node->value.element.num_attrs + 1) * sizeof(mxml_attr_t));
+
+  if (!attr)
+  {
+    mxml_error("Unable to allocate memory for attribute '%s' in element %s!",
+               name, node->value.element.name);
+    return (-1);
+  }
+
+  node->value.element.attrs = attr;
+  attr += node->value.element.num_attrs;
+
+  if ((attr->name = strdup(name)) == NULL)
+  {
+    mxml_error("Unable to allocate memory for attribute '%s' in element %s!",
+               name, node->value.element.name);
+    return (-1);
+  }
+
+  attr->value = value;
+
+  node->value.element.num_attrs ++;
+
+  return (0);
+}
+
+
+/*
+ * End of "$Id: mxml-attr.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-entity.c b/tools/gator/daemon/mxml/mxml-entity.c
new file mode 100644
index 000000000000..0d11df6a70bc
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-entity.c
@@ -0,0 +1,449 @@
+/*
+ * "$Id: mxml-entity.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Character entity support code for Mini-XML, a small XML-like
+ * file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "mxml-private.h"
+
+
+/*
+ * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlEntityAddCallback(
+    mxml_entity_cb_t cb)		/* I - Callback function to add */
+{
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  if (global->num_entity_cbs < (int)(sizeof(global->entity_cbs) / sizeof(global->entity_cbs[0])))
+  {
+    global->entity_cbs[global->num_entity_cbs] = cb;
+    global->num_entity_cbs ++;
+
+    return (0);
+  }
+  else
+  {
+    mxml_error("Unable to add entity callback!");
+
+    return (-1);
+  }
+}
+
+
+/*
+ * 'mxmlEntityGetName()' - Get the name that corresponds to the character value.
+ *
+ * If val does not need to be represented by a named entity, NULL is returned.
+ */
+
+const char *				/* O - Entity name or NULL */
+mxmlEntityGetName(int val)		/* I - Character value */
+{
+  switch (val)
+  {
+    case '&' :
+        return ("amp");
+
+    case '<' :
+        return ("lt");
+
+    case '>' :
+        return ("gt");
+
+    case '\"' :
+        return ("quot");
+
+    default :
+        return (NULL);
+  }
+}
+
+
+/*
+ * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity.
+ *
+ * The entity name can also be a numeric constant. -1 is returned if the
+ * name is not known.
+ */
+
+int					/* O - Character value or -1 on error */
+mxmlEntityGetValue(const char *name)	/* I - Entity name */
+{
+  int		i;			/* Looping var */
+  int		ch;			/* Character value */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  for (i = 0; i < global->num_entity_cbs; i ++)
+    if ((ch = (global->entity_cbs[i])(name)) >= 0)
+      return (ch);
+
+  return (-1);
+}
+
+
+/*
+ * 'mxmlEntityRemoveCallback()' - Remove a callback.
+ */
+
+void
+mxmlEntityRemoveCallback(
+    mxml_entity_cb_t cb)		/* I - Callback function to remove */
+{
+  int		i;			/* Looping var */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  for (i = 0; i < global->num_entity_cbs; i ++)
+    if (cb == global->entity_cbs[i])
+    {
+     /*
+      * Remove the callback...
+      */
+
+      global->num_entity_cbs --;
+
+      if (i < global->num_entity_cbs)
+        memmove(global->entity_cbs + i, global->entity_cbs + i + 1,
+	        (global->num_entity_cbs - i) * sizeof(global->entity_cbs[0]));
+
+      return;
+    }
+}
+
+
+/*
+ * '_mxml_entity_cb()' - Lookup standard (X)HTML entities.
+ */
+
+int					/* O - Unicode value or -1 */
+_mxml_entity_cb(const char *name)	/* I - Entity name */
+{
+  int	diff,				/* Difference between names */
+	current,			/* Current entity in search */
+	first,				/* First entity in search */
+	last;				/* Last entity in search */
+  static const struct
+  {
+    const char	*name;			/* Entity name */
+    int		val;			/* Character value */
+  }	entities[] =
+  {
+    { "AElig",		198 },
+    { "Aacute",		193 },
+    { "Acirc",		194 },
+    { "Agrave",		192 },
+    { "Alpha",		913 },
+    { "Aring",		197 },
+    { "Atilde",		195 },
+    { "Auml",		196 },
+    { "Beta",		914 },
+    { "Ccedil",		199 },
+    { "Chi",		935 },
+    { "Dagger",		8225 },
+    { "Delta",		916 },
+    { "Dstrok",		208 },
+    { "ETH",		208 },
+    { "Eacute",		201 },
+    { "Ecirc",		202 },
+    { "Egrave",		200 },
+    { "Epsilon",	917 },
+    { "Eta",		919 },
+    { "Euml",		203 },
+    { "Gamma",		915 },
+    { "Iacute",		205 },
+    { "Icirc",		206 },
+    { "Igrave",		204 },
+    { "Iota",		921 },
+    { "Iuml",		207 },
+    { "Kappa",		922 },
+    { "Lambda",		923 },
+    { "Mu",		924 },
+    { "Ntilde",		209 },
+    { "Nu",		925 },
+    { "OElig",		338 },
+    { "Oacute",		211 },
+    { "Ocirc",		212 },
+    { "Ograve",		210 },
+    { "Omega",		937 },
+    { "Omicron",	927 },
+    { "Oslash",		216 },
+    { "Otilde",		213 },
+    { "Ouml",		214 },
+    { "Phi",		934 },
+    { "Pi",		928 },
+    { "Prime",		8243 },
+    { "Psi",		936 },
+    { "Rho",		929 },
+    { "Scaron",		352 },
+    { "Sigma",		931 },
+    { "THORN",		222 },
+    { "Tau",		932 },
+    { "Theta",		920 },
+    { "Uacute",		218 },
+    { "Ucirc",		219 },
+    { "Ugrave",		217 },
+    { "Upsilon",	933 },
+    { "Uuml",		220 },
+    { "Xi",		926 },
+    { "Yacute",		221 },
+    { "Yuml",		376 },
+    { "Zeta",		918 },
+    { "aacute",		225 },
+    { "acirc",		226 },
+    { "acute",		180 },
+    { "aelig",		230 },
+    { "agrave",		224 },
+    { "alefsym",	8501 },
+    { "alpha",		945 },
+    { "amp",		'&' },
+    { "and",		8743 },
+    { "ang",		8736 },
+    { "apos",           '\'' },
+    { "aring",		229 },
+    { "asymp",		8776 },
+    { "atilde",		227 },
+    { "auml",		228 },
+    { "bdquo",		8222 },
+    { "beta",		946 },
+    { "brkbar",		166 },
+    { "brvbar",		166 },
+    { "bull",		8226 },
+    { "cap",		8745 },
+    { "ccedil",		231 },
+    { "cedil",		184 },
+    { "cent",		162 },
+    { "chi",		967 },
+    { "circ",		710 },
+    { "clubs",		9827 },
+    { "cong",		8773 },
+    { "copy",		169 },
+    { "crarr",		8629 },
+    { "cup",		8746 },
+    { "curren",		164 },
+    { "dArr",		8659 },
+    { "dagger",		8224 },
+    { "darr",		8595 },
+    { "deg",		176 },
+    { "delta",		948 },
+    { "diams",		9830 },
+    { "die",		168 },
+    { "divide",		247 },
+    { "eacute",		233 },
+    { "ecirc",		234 },
+    { "egrave",		232 },
+    { "empty",		8709 },
+    { "emsp",		8195 },
+    { "ensp",		8194 },
+    { "epsilon",	949 },
+    { "equiv",		8801 },
+    { "eta",		951 },
+    { "eth",		240 },
+    { "euml",		235 },
+    { "euro",		8364 },
+    { "exist",		8707 },
+    { "fnof",		402 },
+    { "forall",		8704 },
+    { "frac12",		189 },
+    { "frac14",		188 },
+    { "frac34",		190 },
+    { "frasl",		8260 },
+    { "gamma",		947 },
+    { "ge",		8805 },
+    { "gt",		'>' },
+    { "hArr",		8660 },
+    { "harr",		8596 },
+    { "hearts",		9829 },
+    { "hellip",		8230 },
+    { "hibar",		175 },
+    { "iacute",		237 },
+    { "icirc",		238 },
+    { "iexcl",		161 },
+    { "igrave",		236 },
+    { "image",		8465 },
+    { "infin",		8734 },
+    { "int",		8747 },
+    { "iota",		953 },
+    { "iquest",		191 },
+    { "isin",		8712 },
+    { "iuml",		239 },
+    { "kappa",		954 },
+    { "lArr",		8656 },
+    { "lambda",		955 },
+    { "lang",		9001 },
+    { "laquo",		171 },
+    { "larr",		8592 },
+    { "lceil",		8968 },
+    { "ldquo",		8220 },
+    { "le",		8804 },
+    { "lfloor",		8970 },
+    { "lowast",		8727 },
+    { "loz",		9674 },
+    { "lrm",		8206 },
+    { "lsaquo",		8249 },
+    { "lsquo",		8216 },
+    { "lt",		'<' },
+    { "macr",		175 },
+    { "mdash",		8212 },
+    { "micro",		181 },
+    { "middot",		183 },
+    { "minus",		8722 },
+    { "mu",		956 },
+    { "nabla",		8711 },
+    { "nbsp",		160 },
+    { "ndash",		8211 },
+    { "ne",		8800 },
+    { "ni",		8715 },
+    { "not",		172 },
+    { "notin",		8713 },
+    { "nsub",		8836 },
+    { "ntilde",		241 },
+    { "nu",		957 },
+    { "oacute",		243 },
+    { "ocirc",		244 },
+    { "oelig",		339 },
+    { "ograve",		242 },
+    { "oline",		8254 },
+    { "omega",		969 },
+    { "omicron",	959 },
+    { "oplus",		8853 },
+    { "or",		8744 },
+    { "ordf",		170 },
+    { "ordm",		186 },
+    { "oslash",		248 },
+    { "otilde",		245 },
+    { "otimes",		8855 },
+    { "ouml",		246 },
+    { "para",		182 },
+    { "part",		8706 },
+    { "permil",		8240 },
+    { "perp",		8869 },
+    { "phi",		966 },
+    { "pi",		960 },
+    { "piv",		982 },
+    { "plusmn",		177 },
+    { "pound",		163 },
+    { "prime",		8242 },
+    { "prod",		8719 },
+    { "prop",		8733 },
+    { "psi",		968 },
+    { "quot",		'\"' },
+    { "rArr",		8658 },
+    { "radic",		8730 },
+    { "rang",		9002 },
+    { "raquo",		187 },
+    { "rarr",		8594 },
+    { "rceil",		8969 },
+    { "rdquo",		8221 },
+    { "real",		8476 },
+    { "reg",		174 },
+    { "rfloor",		8971 },
+    { "rho",		961 },
+    { "rlm",		8207 },
+    { "rsaquo",		8250 },
+    { "rsquo",		8217 },
+    { "sbquo",		8218 },
+    { "scaron",		353 },
+    { "sdot",		8901 },
+    { "sect",		167 },
+    { "shy",		173 },
+    { "sigma",		963 },
+    { "sigmaf",		962 },
+    { "sim",		8764 },
+    { "spades",		9824 },
+    { "sub",		8834 },
+    { "sube",		8838 },
+    { "sum",		8721 },
+    { "sup",		8835 },
+    { "sup1",		185 },
+    { "sup2",		178 },
+    { "sup3",		179 },
+    { "supe",		8839 },
+    { "szlig",		223 },
+    { "tau",		964 },
+    { "there4",		8756 },
+    { "theta",		952 },
+    { "thetasym",	977 },
+    { "thinsp",		8201 },
+    { "thorn",		254 },
+    { "tilde",		732 },
+    { "times",		215 },
+    { "trade",		8482 },
+    { "uArr",		8657 },
+    { "uacute",		250 },
+    { "uarr",		8593 },
+    { "ucirc",		251 },
+    { "ugrave",		249 },
+    { "uml",		168 },
+    { "upsih",		978 },
+    { "upsilon",	965 },
+    { "uuml",		252 },
+    { "weierp",		8472 },
+    { "xi",		958 },
+    { "yacute",		253 },
+    { "yen",		165 },
+    { "yuml",		255 },
+    { "zeta",		950 },
+    { "zwj",		8205 },
+    { "zwnj",		8204 }
+  };
+
+
+ /*
+  * Do a binary search for the named entity...
+  */
+
+  first = 0;
+  last  = (int)(sizeof(entities) / sizeof(entities[0]) - 1);
+
+  while ((last - first) > 1)
+  {
+    current = (first + last) / 2;
+
+    if ((diff = strcmp(name, entities[current].name)) == 0)
+      return (entities[current].val);
+    else if (diff < 0)
+      last = current;
+    else
+      first = current;
+  }
+
+ /*
+  * If we get here, there is a small chance that there is still
+  * a match; check first and last...
+  */
+
+  if (!strcmp(name, entities[first].name))
+    return (entities[first].val);
+  else if (!strcmp(name, entities[last].name))
+    return (entities[last].val);
+  else
+    return (-1);
+}
+
+
+/*
+ * End of "$Id: mxml-entity.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-file.c b/tools/gator/daemon/mxml/mxml-file.c
new file mode 100644
index 000000000000..3812c253fc3e
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-file.c
@@ -0,0 +1,3056 @@
+/*
+ * "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $"
+ *
+ * File loading code for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*** This file modified by ARM on 25 Aug 2014 to avoid pointer overflow when checking if the write position is beyond the end of the buffer in mxmlSaveString and mxml_string_putc ***/
+
+/*
+ * Include necessary headers...
+ */
+
+#ifndef WIN32
+#  include <unistd.h>
+#endif /* !WIN32 */
+#include "mxml-private.h"
+
+
+/*
+ * Character encoding...
+ */
+
+#define ENCODE_UTF8	0		/* UTF-8 */
+#define ENCODE_UTF16BE	1		/* UTF-16 Big-Endian */
+#define ENCODE_UTF16LE	2		/* UTF-16 Little-Endian */
+
+
+/*
+ * Macro to test for a bad XML character...
+ */
+
+#define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')
+
+
+/*
+ * Types and structures...
+ */
+
+typedef int (*_mxml_getc_cb_t)(void *, int *);
+typedef int (*_mxml_putc_cb_t)(int, void *);
+
+typedef struct _mxml_fdbuf_s		/**** File descriptor buffer ****/
+{
+  int		fd;			/* File descriptor */
+  unsigned char	*current,		/* Current position in buffer */
+		*end,			/* End of buffer */
+		buffer[8192];		/* Character buffer */
+} _mxml_fdbuf_t;
+
+
+/*
+ * Local functions...
+ */
+
+static int		mxml_add_char(int ch, char **ptr, char **buffer,
+			              int *bufsize);
+static int		mxml_fd_getc(void *p, int *encoding);
+static int		mxml_fd_putc(int ch, void *p);
+static int		mxml_fd_read(_mxml_fdbuf_t *buf);
+static int		mxml_fd_write(_mxml_fdbuf_t *buf);
+static int		mxml_file_getc(void *p, int *encoding);
+static int		mxml_file_putc(int ch, void *p);
+static int		mxml_get_entity(mxml_node_t *parent, void *p,
+			                int *encoding,
+					_mxml_getc_cb_t getc_cb);
+static inline int	mxml_isspace(int ch)
+			{
+			  return (ch == ' ' || ch == '\t' || ch == '\r' ||
+			          ch == '\n');
+			}
+static mxml_node_t	*mxml_load_data(mxml_node_t *top, void *p,
+			                mxml_load_cb_t cb,
+			                _mxml_getc_cb_t getc_cb,
+                                        mxml_sax_cb_t sax_cb, void *sax_data);
+static int		mxml_parse_element(mxml_node_t *node, void *p,
+			                   int *encoding,
+					   _mxml_getc_cb_t getc_cb);
+static int		mxml_string_getc(void *p, int *encoding);
+static int		mxml_string_putc(int ch, void *p);
+static int		mxml_write_name(const char *s, void *p,
+					_mxml_putc_cb_t putc_cb);
+static int		mxml_write_node(mxml_node_t *node, void *p,
+			                mxml_save_cb_t cb, int col,
+					_mxml_putc_cb_t putc_cb,
+					_mxml_global_t *global);
+static int		mxml_write_string(const char *s, void *p,
+					  _mxml_putc_cb_t putc_cb);
+static int		mxml_write_ws(mxml_node_t *node, void *p,
+			              mxml_save_cb_t cb, int ws,
+				      int col, _mxml_putc_cb_t putc_cb);
+
+
+/*
+ * 'mxmlLoadFd()' - Load a file descriptor into an XML node tree.
+ *
+ * The nodes in the specified file are added to the specified top node.
+ * If no top node is provided, the XML file MUST be well-formed with a
+ * single parent node like <?xml> for the entire file. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ */
+
+mxml_node_t *				/* O - First node or NULL if the file could not be read. */
+mxmlLoadFd(mxml_node_t    *top,		/* I - Top node */
+           int            fd,		/* I - File descriptor to read from */
+           mxml_load_cb_t cb)		/* I - Callback function or MXML_NO_CALLBACK */
+{
+  _mxml_fdbuf_t	buf;			/* File descriptor buffer */
+
+
+ /*
+  * Initialize the file descriptor buffer...
+  */
+
+  buf.fd      = fd;
+  buf.current = buf.buffer;
+  buf.end     = buf.buffer;
+
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL));
+}
+
+
+/*
+ * 'mxmlLoadFile()' - Load a file into an XML node tree.
+ *
+ * The nodes in the specified file are added to the specified top node.
+ * If no top node is provided, the XML file MUST be well-formed with a
+ * single parent node like <?xml> for the entire file. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ */
+
+mxml_node_t *				/* O - First node or NULL if the file could not be read. */
+mxmlLoadFile(mxml_node_t    *top,	/* I - Top node */
+             FILE           *fp,	/* I - File to read from */
+             mxml_load_cb_t cb)		/* I - Callback function or MXML_NO_CALLBACK */
+{
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL));
+}
+
+
+/*
+ * 'mxmlLoadString()' - Load a string into an XML node tree.
+ *
+ * The nodes in the specified string are added to the specified top node.
+ * If no top node is provided, the XML string MUST be well-formed with a
+ * single parent node like <?xml> for the entire string. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ */
+
+mxml_node_t *				/* O - First node or NULL if the string has errors. */
+mxmlLoadString(mxml_node_t    *top,	/* I - Top node */
+               const char     *s,	/* I - String to load */
+               mxml_load_cb_t cb)	/* I - Callback function or MXML_NO_CALLBACK */
+{
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK,
+                         NULL));
+}
+
+
+/*
+ * 'mxmlSaveAllocString()' - Save an XML tree to an allocated string.
+ *
+ * This function returns a pointer to a string containing the textual
+ * representation of the XML node tree.  The string should be freed
+ * using the free() function when you are done with it.  NULL is returned
+ * if the node would produce an empty string or if the string cannot be
+ * allocated.
+ *
+ * The callback argument specifies a function that returns a whitespace
+ * string or NULL before and after each element. If MXML_NO_CALLBACK
+ * is specified, whitespace will only be added before MXML_TEXT nodes
+ * with leading whitespace and before attribute names inside opening
+ * element tags.
+ */
+
+char *					/* O - Allocated string or NULL */
+mxmlSaveAllocString(
+    mxml_node_t    *node,		/* I - Node to write */
+    mxml_save_cb_t cb)			/* I - Whitespace callback or MXML_NO_CALLBACK */
+{
+  int	bytes;				/* Required bytes */
+  char	buffer[8192];			/* Temporary buffer */
+  char	*s;				/* Allocated string */
+
+
+ /*
+  * Write the node to the temporary buffer...
+  */
+
+  bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb);
+
+  if (bytes <= 0)
+    return (NULL);
+
+  if (bytes < (int)(sizeof(buffer) - 1))
+  {
+   /*
+    * Node fit inside the buffer, so just duplicate that string and
+    * return...
+    */
+
+    return (strdup(buffer));
+  }
+
+ /*
+  * Allocate a buffer of the required size and save the node to the
+  * new buffer...
+  */
+
+  if ((s = malloc(bytes + 1)) == NULL)
+    return (NULL);
+
+  mxmlSaveString(node, s, bytes + 1, cb);
+
+ /*
+  * Return the allocated string...
+  */
+
+  return (s);
+}
+
+
+/*
+ * 'mxmlSaveFd()' - Save an XML tree to a file descriptor.
+ *
+ * The callback argument specifies a function that returns a whitespace
+ * string or NULL before and after each element. If MXML_NO_CALLBACK
+ * is specified, whitespace will only be added before MXML_TEXT nodes
+ * with leading whitespace and before attribute names inside opening
+ * element tags.
+ */
+
+int					/* O - 0 on success, -1 on error. */
+mxmlSaveFd(mxml_node_t    *node,	/* I - Node to write */
+           int            fd,		/* I - File descriptor to write to */
+	   mxml_save_cb_t cb)		/* I - Whitespace callback or MXML_NO_CALLBACK */
+{
+  int		col;			/* Final column */
+  _mxml_fdbuf_t	buf;			/* File descriptor buffer */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+ /*
+  * Initialize the file descriptor buffer...
+  */
+
+  buf.fd      = fd;
+  buf.current = buf.buffer;
+  buf.end     = buf.buffer + sizeof(buf.buffer);
+
+ /*
+  * Write the node...
+  */
+
+  if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0)
+    return (-1);
+
+  if (col > 0)
+    if (mxml_fd_putc('\n', &buf) < 0)
+      return (-1);
+
+ /*
+  * Flush and return...
+  */
+
+  return (mxml_fd_write(&buf));
+}
+
+
+/*
+ * 'mxmlSaveFile()' - Save an XML tree to a file.
+ *
+ * The callback argument specifies a function that returns a whitespace
+ * string or NULL before and after each element. If MXML_NO_CALLBACK
+ * is specified, whitespace will only be added before MXML_TEXT nodes
+ * with leading whitespace and before attribute names inside opening
+ * element tags.
+ */
+
+int					/* O - 0 on success, -1 on error. */
+mxmlSaveFile(mxml_node_t    *node,	/* I - Node to write */
+             FILE           *fp,	/* I - File to write to */
+	     mxml_save_cb_t cb)		/* I - Whitespace callback or MXML_NO_CALLBACK */
+{
+  int	col;				/* Final column */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+ /*
+  * Write the node...
+  */
+
+  if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0)
+    return (-1);
+
+  if (col > 0)
+    if (putc('\n', fp) < 0)
+      return (-1);
+
+ /*
+  * Return 0 (success)...
+  */
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSaveString()' - Save an XML node tree to a string.
+ *
+ * This function returns the total number of bytes that would be
+ * required for the string but only copies (bufsize - 1) characters
+ * into the specified buffer.
+ *
+ * The callback argument specifies a function that returns a whitespace
+ * string or NULL before and after each element. If MXML_NO_CALLBACK
+ * is specified, whitespace will only be added before MXML_TEXT nodes
+ * with leading whitespace and before attribute names inside opening
+ * element tags.
+ */
+
+int					/* O - Size of string */
+mxmlSaveString(mxml_node_t    *node,	/* I - Node to write */
+               char           *buffer,	/* I - String buffer */
+               int            bufsize,	/* I - Size of string buffer */
+               mxml_save_cb_t cb)	/* I - Whitespace callback or MXML_NO_CALLBACK */
+{
+  int	col;				/* Final column */
+  char	*ptr[3];			/* Pointers for putc_cb */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+ /*
+  * Write the node...
+  */
+
+  ptr[0] = buffer;
+  ptr[1] = buffer + bufsize;
+  ptr[2] = 0;
+
+  if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0)
+    return (-1);
+
+  if (col > 0)
+    mxml_string_putc('\n', ptr);
+
+ /*
+  * Nul-terminate the buffer...
+  */
+
+  if (ptr[2] != 0)
+    buffer[bufsize - 1] = '\0';
+  else
+    ptr[0][0] = '\0';
+
+ /*
+  * Return the number of characters...
+  */
+
+  return (ptr[0] - buffer);
+}
+
+
+/*
+ * 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree
+ *                     using a SAX callback.
+ *
+ * The nodes in the specified file are added to the specified top node.
+ * If no top node is provided, the XML file MUST be well-formed with a
+ * single parent node like <?xml> for the entire file. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ *
+ * The SAX callback must call mxmlRetain() for any nodes that need to
+ * be kept for later use. Otherwise, nodes are deleted when the parent
+ * node is closed or after each data, comment, CDATA, or directive node.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+mxml_node_t *				/* O - First node or NULL if the file could not be read. */
+mxmlSAXLoadFd(mxml_node_t    *top,	/* I - Top node */
+              int            fd,	/* I - File descriptor to read from */
+              mxml_load_cb_t cb,	/* I - Callback function or MXML_NO_CALLBACK */
+              mxml_sax_cb_t  sax_cb,	/* I - SAX callback or MXML_NO_CALLBACK */
+              void           *sax_data)	/* I - SAX user data */
+{
+  _mxml_fdbuf_t	buf;			/* File descriptor buffer */
+
+
+ /*
+  * Initialize the file descriptor buffer...
+  */
+
+  buf.fd      = fd;
+  buf.current = buf.buffer;
+  buf.end     = buf.buffer;
+
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data));
+}
+
+
+/*
+ * 'mxmlSAXLoadFile()' - Load a file into an XML node tree
+ *                       using a SAX callback.
+ *
+ * The nodes in the specified file are added to the specified top node.
+ * If no top node is provided, the XML file MUST be well-formed with a
+ * single parent node like <?xml> for the entire file. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ *
+ * The SAX callback must call mxmlRetain() for any nodes that need to
+ * be kept for later use. Otherwise, nodes are deleted when the parent
+ * node is closed or after each data, comment, CDATA, or directive node.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+mxml_node_t *				/* O - First node or NULL if the file could not be read. */
+mxmlSAXLoadFile(
+    mxml_node_t    *top,		/* I - Top node */
+    FILE           *fp,			/* I - File to read from */
+    mxml_load_cb_t cb,			/* I - Callback function or MXML_NO_CALLBACK */
+    mxml_sax_cb_t  sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
+    void           *sax_data)		/* I - SAX user data */
+{
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data));
+}
+
+
+/*
+ * 'mxmlSAXLoadString()' - Load a string into an XML node tree
+ *                         using a SAX callback.
+ *
+ * The nodes in the specified string are added to the specified top node.
+ * If no top node is provided, the XML string MUST be well-formed with a
+ * single parent node like <?xml> for the entire string. The callback
+ * function returns the value type that should be used for child nodes.
+ * If MXML_NO_CALLBACK is specified then all child nodes will be either
+ * MXML_ELEMENT or MXML_TEXT nodes.
+ *
+ * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
+ * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
+ * child nodes of the specified type.
+ *
+ * The SAX callback must call mxmlRetain() for any nodes that need to
+ * be kept for later use. Otherwise, nodes are deleted when the parent
+ * node is closed or after each data, comment, CDATA, or directive node.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+mxml_node_t *				/* O - First node or NULL if the string has errors. */
+mxmlSAXLoadString(
+    mxml_node_t    *top,		/* I - Top node */
+    const char     *s,			/* I - String to load */
+    mxml_load_cb_t cb,			/* I - Callback function or MXML_NO_CALLBACK */
+    mxml_sax_cb_t  sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
+    void           *sax_data)		/* I - SAX user data */
+{
+ /*
+  * Read the XML data...
+  */
+
+  return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data));
+}
+
+
+/*
+ * 'mxmlSetCustomHandlers()' - Set the handling functions for custom data.
+ *
+ * The load function accepts a node pointer and a data string and must
+ * return 0 on success and non-zero on error.
+ *
+ * The save function accepts a node pointer and must return a malloc'd
+ * string on success and NULL on error.
+ *
+ */
+
+void
+mxmlSetCustomHandlers(
+    mxml_custom_load_cb_t load,		/* I - Load function */
+    mxml_custom_save_cb_t save)		/* I - Save function */
+{
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  global->custom_load_cb = load;
+  global->custom_save_cb = save;
+}
+
+
+/*
+ * 'mxmlSetErrorCallback()' - Set the error message callback.
+ */
+
+void
+mxmlSetErrorCallback(mxml_error_cb_t cb)/* I - Error callback function */
+{
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  global->error_cb = cb;
+}
+
+
+/*
+ * 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data.
+ *
+ * Wrapping is disabled when "column" is 0.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+void
+mxmlSetWrapMargin(int column)		/* I - Column for wrapping, 0 to disable wrapping */
+{
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+  global->wrap = column;
+}
+
+
+/*
+ * 'mxml_add_char()' - Add a character to a buffer, expanding as needed.
+ */
+
+static int				/* O  - 0 on success, -1 on error */
+mxml_add_char(int  ch,			/* I  - Character to add */
+              char **bufptr,		/* IO - Current position in buffer */
+	      char **buffer,		/* IO - Current buffer */
+	      int  *bufsize)		/* IO - Current buffer size */
+{
+  char	*newbuffer;			/* New buffer value */
+
+
+  if (*bufptr >= (*buffer + *bufsize - 4))
+  {
+   /*
+    * Increase the size of the buffer...
+    */
+
+    if (*bufsize < 1024)
+      (*bufsize) *= 2;
+    else
+      (*bufsize) += 1024;
+
+    if ((newbuffer = realloc(*buffer, *bufsize)) == NULL)
+    {
+      free(*buffer);
+
+      mxml_error("Unable to expand string buffer to %d bytes!", *bufsize);
+
+      return (-1);
+    }
+
+    *bufptr = newbuffer + (*bufptr - *buffer);
+    *buffer = newbuffer;
+  }
+
+  if (ch < 0x80)
+  {
+   /*
+    * Single byte ASCII...
+    */
+
+    *(*bufptr)++ = ch;
+  }
+  else if (ch < 0x800)
+  {
+   /*
+    * Two-byte UTF-8...
+    */
+
+    *(*bufptr)++ = 0xc0 | (ch >> 6);
+    *(*bufptr)++ = 0x80 | (ch & 0x3f);
+  }
+  else if (ch < 0x10000)
+  {
+   /*
+    * Three-byte UTF-8...
+    */
+
+    *(*bufptr)++ = 0xe0 | (ch >> 12);
+    *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
+    *(*bufptr)++ = 0x80 | (ch & 0x3f);
+  }
+  else
+  {
+   /*
+    * Four-byte UTF-8...
+    */
+
+    *(*bufptr)++ = 0xf0 | (ch >> 18);
+    *(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f);
+    *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
+    *(*bufptr)++ = 0x80 | (ch & 0x3f);
+  }
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_fd_getc()' - Read a character from a file descriptor.
+ */
+
+static int				/* O  - Character or EOF */
+mxml_fd_getc(void *p,			/* I  - File descriptor buffer */
+             int  *encoding)		/* IO - Encoding */
+{
+  _mxml_fdbuf_t	*buf;			/* File descriptor buffer */
+  int		ch,			/* Current character */
+		temp;			/* Temporary character */
+
+
+ /*
+  * Grab the next character in the buffer...
+  */
+
+  buf = (_mxml_fdbuf_t *)p;
+
+  if (buf->current >= buf->end)
+    if (mxml_fd_read(buf) < 0)
+      return (EOF);
+
+  ch = *(buf->current)++;
+
+  switch (*encoding)
+  {
+    case ENCODE_UTF8 :
+       /*
+	* Got a UTF-8 character; convert UTF-8 to Unicode and return...
+	*/
+
+	if (!(ch & 0x80))
+	{
+#if DEBUG > 1
+          printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	  if (mxml_bad_char(ch))
+	  {
+	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	       ch);
+	    return (EOF);
+	  }
+
+	  return (ch);
+        }
+	else if (ch == 0xfe)
+	{
+	 /*
+	  * UTF-16 big-endian BOM?
+	  */
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  ch = *(buf->current)++;
+
+	  if (ch != 0xff)
+	    return (EOF);
+
+	  *encoding = ENCODE_UTF16BE;
+
+	  return (mxml_fd_getc(p, encoding));
+	}
+	else if (ch == 0xff)
+	{
+	 /*
+	  * UTF-16 little-endian BOM?
+	  */
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  ch = *(buf->current)++;
+
+	  if (ch != 0xfe)
+	    return (EOF);
+
+	  *encoding = ENCODE_UTF16LE;
+
+	  return (mxml_fd_getc(p, encoding));
+	}
+	else if ((ch & 0xe0) == 0xc0)
+	{
+	 /*
+	  * Two-byte value...
+	  */
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
+
+	  if (ch < 0x80)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+	}
+	else if ((ch & 0xf0) == 0xe0)
+	{
+	 /*
+	  * Three-byte value...
+	  */
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if (ch < 0x800)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+
+         /*
+	  * Ignore (strip) Byte Order Mark (BOM)...
+	  */
+
+	  if (ch == 0xfeff)
+	    return (mxml_fd_getc(p, encoding));
+	}
+	else if ((ch & 0xf8) == 0xf0)
+	{
+	 /*
+	  * Four-byte value...
+	  */
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x07) << 6) | (temp & 0x3f);
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  if ((temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if (ch < 0x10000)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+	}
+	else
+	  return (EOF);
+	break;
+
+    case ENCODE_UTF16BE :
+       /*
+        * Read UTF-16 big-endian char...
+	*/
+
+	if (buf->current >= buf->end)
+	  if (mxml_fd_read(buf) < 0)
+	    return (EOF);
+
+	temp = *(buf->current)++;
+
+	ch = (ch << 8) | temp;
+
+	if (mxml_bad_char(ch))
+	{
+	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	     ch);
+	  return (EOF);
+	}
+        else if (ch >= 0xd800 && ch <= 0xdbff)
+	{
+	 /*
+	  * Multi-word UTF-16 char...
+	  */
+
+          int lch;
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  lch = *(buf->current)++;
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  lch = (lch << 8) | temp;
+
+          if (lch < 0xdc00 || lch >= 0xdfff)
+	    return (EOF);
+
+          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	}
+	break;
+
+    case ENCODE_UTF16LE :
+       /*
+        * Read UTF-16 little-endian char...
+	*/
+
+	if (buf->current >= buf->end)
+	  if (mxml_fd_read(buf) < 0)
+	    return (EOF);
+
+	temp = *(buf->current)++;
+
+	ch |= (temp << 8);
+
+        if (mxml_bad_char(ch))
+	{
+	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	     ch);
+	  return (EOF);
+	}
+        else if (ch >= 0xd800 && ch <= 0xdbff)
+	{
+	 /*
+	  * Multi-word UTF-16 char...
+	  */
+
+          int lch;
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  lch = *(buf->current)++;
+
+	  if (buf->current >= buf->end)
+	    if (mxml_fd_read(buf) < 0)
+	      return (EOF);
+
+	  temp = *(buf->current)++;
+
+	  lch |= (temp << 8);
+
+          if (lch < 0xdc00 || lch >= 0xdfff)
+	    return (EOF);
+
+          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	}
+	break;
+  }
+
+#if DEBUG > 1
+  printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+  return (ch);
+}
+
+
+/*
+ * 'mxml_fd_putc()' - Write a character to a file descriptor.
+ */
+
+static int				/* O - 0 on success, -1 on error */
+mxml_fd_putc(int  ch,			/* I - Character */
+             void *p)			/* I - File descriptor buffer */
+{
+  _mxml_fdbuf_t	*buf;			/* File descriptor buffer */
+
+
+ /*
+  * Flush the write buffer as needed...
+  */
+
+  buf = (_mxml_fdbuf_t *)p;
+
+  if (buf->current >= buf->end)
+    if (mxml_fd_write(buf) < 0)
+      return (-1);
+
+  *(buf->current)++ = ch;
+
+ /*
+  * Return successfully...
+  */
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_fd_read()' - Read a buffer of data from a file descriptor.
+ */
+
+static int				/* O - 0 on success, -1 on error */
+mxml_fd_read(_mxml_fdbuf_t *buf)		/* I - File descriptor buffer */
+{
+  int	bytes;				/* Bytes read... */
+
+
+ /*
+  * Range check input...
+  */
+
+  if (!buf)
+    return (-1);
+
+ /*
+  * Read from the file descriptor...
+  */
+
+  while ((bytes = read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0)
+#ifdef EINTR
+    if (errno != EAGAIN && errno != EINTR)
+#else
+    if (errno != EAGAIN)
+#endif /* EINTR */
+      return (-1);
+
+  if (bytes == 0)
+    return (-1);
+
+ /*
+  * Update the pointers and return success...
+  */
+
+  buf->current = buf->buffer;
+  buf->end     = buf->buffer + bytes;
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_fd_write()' - Write a buffer of data to a file descriptor.
+ */
+
+static int				/* O - 0 on success, -1 on error */
+mxml_fd_write(_mxml_fdbuf_t *buf)	/* I - File descriptor buffer */
+{
+  int		bytes;			/* Bytes written */
+  unsigned char	*ptr;			/* Pointer into buffer */
+
+
+ /*
+  * Range check...
+  */
+
+  if (!buf)
+    return (-1);
+
+ /*
+  * Return 0 if there is nothing to write...
+  */
+
+  if (buf->current == buf->buffer)
+    return (0);
+
+ /*
+  * Loop until we have written everything...
+  */
+
+  for (ptr = buf->buffer; ptr < buf->current; ptr += bytes)
+    if ((bytes = write(buf->fd, ptr, buf->current - ptr)) < 0)
+      return (-1);
+
+ /*
+  * All done, reset pointers and return success...
+  */
+
+  buf->current = buf->buffer;
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_file_getc()' - Get a character from a file.
+ */
+
+static int				/* O  - Character or EOF */
+mxml_file_getc(void *p,			/* I  - Pointer to file */
+               int  *encoding)		/* IO - Encoding */
+{
+  int	ch,				/* Character from file */
+	temp;				/* Temporary character */
+  FILE	*fp;				/* Pointer to file */
+
+
+ /*
+  * Read a character from the file and see if it is EOF or ASCII...
+  */
+
+  fp = (FILE *)p;
+  ch = getc(fp);
+
+  if (ch == EOF)
+    return (EOF);
+
+  switch (*encoding)
+  {
+    case ENCODE_UTF8 :
+       /*
+	* Got a UTF-8 character; convert UTF-8 to Unicode and return...
+	*/
+
+	if (!(ch & 0x80))
+	{
+	  if (mxml_bad_char(ch))
+	  {
+	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	       ch);
+	    return (EOF);
+	  }
+
+#if DEBUG > 1
+          printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	  return (ch);
+        }
+	else if (ch == 0xfe)
+	{
+	 /*
+	  * UTF-16 big-endian BOM?
+	  */
+
+          ch = getc(fp);
+	  if (ch != 0xff)
+	    return (EOF);
+
+	  *encoding = ENCODE_UTF16BE;
+
+	  return (mxml_file_getc(p, encoding));
+	}
+	else if (ch == 0xff)
+	{
+	 /*
+	  * UTF-16 little-endian BOM?
+	  */
+
+          ch = getc(fp);
+	  if (ch != 0xfe)
+	    return (EOF);
+
+	  *encoding = ENCODE_UTF16LE;
+
+	  return (mxml_file_getc(p, encoding));
+	}
+	else if ((ch & 0xe0) == 0xc0)
+	{
+	 /*
+	  * Two-byte value...
+	  */
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
+
+	  if (ch < 0x80)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+	}
+	else if ((ch & 0xf0) == 0xe0)
+	{
+	 /*
+	  * Three-byte value...
+	  */
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if (ch < 0x800)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+
+         /*
+	  * Ignore (strip) Byte Order Mark (BOM)...
+	  */
+
+	  if (ch == 0xfeff)
+	    return (mxml_file_getc(p, encoding));
+	}
+	else if ((ch & 0xf8) == 0xf0)
+	{
+	 /*
+	  * Four-byte value...
+	  */
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = ((ch & 0x07) << 6) | (temp & 0x3f);
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+	    return (EOF);
+
+	  ch = (ch << 6) | (temp & 0x3f);
+
+	  if (ch < 0x10000)
+	  {
+	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	    return (EOF);
+	  }
+	}
+	else
+	  return (EOF);
+	break;
+
+    case ENCODE_UTF16BE :
+       /*
+        * Read UTF-16 big-endian char...
+	*/
+
+	ch = (ch << 8) | getc(fp);
+
+	if (mxml_bad_char(ch))
+	{
+	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	     ch);
+	  return (EOF);
+	}
+        else if (ch >= 0xd800 && ch <= 0xdbff)
+	{
+	 /*
+	  * Multi-word UTF-16 char...
+	  */
+
+          int lch = getc(fp);
+          lch = (lch << 8) | getc(fp);
+
+          if (lch < 0xdc00 || lch >= 0xdfff)
+	    return (EOF);
+
+          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	}
+	break;
+
+    case ENCODE_UTF16LE :
+       /*
+        * Read UTF-16 little-endian char...
+	*/
+
+	ch |= (getc(fp) << 8);
+
+        if (mxml_bad_char(ch))
+	{
+	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	     ch);
+	  return (EOF);
+	}
+        else if (ch >= 0xd800 && ch <= 0xdbff)
+	{
+	 /*
+	  * Multi-word UTF-16 char...
+	  */
+
+          int lch = getc(fp);
+          lch |= (getc(fp) << 8);
+
+          if (lch < 0xdc00 || lch >= 0xdfff)
+	    return (EOF);
+
+          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	}
+	break;
+  }
+
+#if DEBUG > 1
+  printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+  return (ch);
+}
+
+
+/*
+ * 'mxml_file_putc()' - Write a character to a file.
+ */
+
+static int				/* O - 0 on success, -1 on failure */
+mxml_file_putc(int  ch,			/* I - Character to write */
+               void *p)			/* I - Pointer to file */
+{
+  return (putc(ch, (FILE *)p) == EOF ? -1 : 0);
+}
+
+
+/*
+ * 'mxml_get_entity()' - Get the character corresponding to an entity...
+ */
+
+static int				/* O  - Character value or EOF on error */
+mxml_get_entity(mxml_node_t *parent,	/* I  - Parent node */
+		void        *p,		/* I  - Pointer to source */
+		int         *encoding,	/* IO - Character encoding */
+                int         (*getc_cb)(void *, int *))
+					/* I  - Get character function */
+{
+  int	ch;				/* Current character */
+  char	entity[64],			/* Entity string */
+	*entptr;			/* Pointer into entity */
+
+
+  entptr = entity;
+
+  while ((ch = (*getc_cb)(p, encoding)) != EOF)
+    if (ch > 126 || (!isalnum(ch) && ch != '#'))
+      break;
+    else if (entptr < (entity + sizeof(entity) - 1))
+      *entptr++ = ch;
+    else
+    {
+      mxml_error("Entity name too long under parent <%s>!",
+	         parent ? parent->value.element.name : "null");
+      break;
+    }
+
+  *entptr = '\0';
+
+  if (ch != ';')
+  {
+    mxml_error("Character entity \"%s\" not terminated under parent <%s>!",
+	       entity, parent ? parent->value.element.name : "null");
+    return (EOF);
+  }
+
+  if (entity[0] == '#')
+  {
+    if (entity[1] == 'x')
+      ch = strtol(entity + 2, NULL, 16);
+    else
+      ch = strtol(entity + 1, NULL, 10);
+  }
+  else if ((ch = mxmlEntityGetValue(entity)) < 0)
+    mxml_error("Entity name \"%s;\" not supported under parent <%s>!",
+	       entity, parent ? parent->value.element.name : "null");
+
+  if (mxml_bad_char(ch))
+  {
+    mxml_error("Bad control character 0x%02x under parent <%s> not allowed by XML standard!",
+               ch, parent ? parent->value.element.name : "null");
+    return (EOF);
+  }
+
+  return (ch);
+}
+
+
+/*
+ * 'mxml_load_data()' - Load data into an XML node tree.
+ */
+
+static mxml_node_t *			/* O - First node or NULL if the file could not be read. */
+mxml_load_data(
+    mxml_node_t     *top,		/* I - Top node */
+    void            *p,			/* I - Pointer to data */
+    mxml_load_cb_t  cb,			/* I - Callback function or MXML_NO_CALLBACK */
+    _mxml_getc_cb_t getc_cb,		/* I - Read function */
+    mxml_sax_cb_t   sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
+    void            *sax_data)		/* I - SAX user data */
+{
+  mxml_node_t	*node,			/* Current node */
+		*first,			/* First node added */
+		*parent;		/* Current parent node */
+  int		ch,			/* Character from file */
+		whitespace;		/* Non-zero if whitespace seen */
+  char		*buffer,		/* String buffer */
+		*bufptr;		/* Pointer into buffer */
+  int		bufsize;		/* Size of buffer */
+  mxml_type_t	type;			/* Current node type */
+  int		encoding;		/* Character encoding */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+  static const char * const types[] =	/* Type strings... */
+		{
+		  "MXML_ELEMENT",	/* XML element with attributes */
+		  "MXML_INTEGER",	/* Integer value */
+		  "MXML_OPAQUE",	/* Opaque string */
+		  "MXML_REAL",		/* Real value */
+		  "MXML_TEXT",		/* Text fragment */
+		  "MXML_CUSTOM"		/* Custom data */
+		};
+
+
+ /*
+  * Read elements and other nodes from the file...
+  */
+
+  if ((buffer = malloc(64)) == NULL)
+  {
+    mxml_error("Unable to allocate string buffer!");
+    return (NULL);
+  }
+
+  bufsize    = 64;
+  bufptr     = buffer;
+  parent     = top;
+  first      = NULL;
+  whitespace = 0;
+  encoding   = ENCODE_UTF8;
+
+  if (cb && parent)
+    type = (*cb)(parent);
+  else if (parent)
+    type = MXML_TEXT;
+  else
+    type = MXML_IGNORE;
+
+  while ((ch = (*getc_cb)(p, &encoding)) != EOF)
+  {
+    if ((ch == '<' ||
+         (mxml_isspace(ch) && type != MXML_OPAQUE && type != MXML_CUSTOM)) &&
+        bufptr > buffer)
+    {
+     /*
+      * Add a new value node...
+      */
+
+      *bufptr = '\0';
+
+      switch (type)
+      {
+	case MXML_INTEGER :
+            node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0));
+	    break;
+
+	case MXML_OPAQUE :
+            node = mxmlNewOpaque(parent, buffer);
+	    break;
+
+	case MXML_REAL :
+            node = mxmlNewReal(parent, strtod(buffer, &bufptr));
+	    break;
+
+	case MXML_TEXT :
+            node = mxmlNewText(parent, whitespace, buffer);
+	    break;
+
+	case MXML_CUSTOM :
+	    if (global->custom_load_cb)
+	    {
+	     /*
+	      * Use the callback to fill in the custom data...
+	      */
+
+              node = mxmlNewCustom(parent, NULL, NULL);
+
+	      if ((*global->custom_load_cb)(node, buffer))
+	      {
+	        mxml_error("Bad custom value '%s' in parent <%s>!",
+		           buffer, parent ? parent->value.element.name : "null");
+		mxmlDelete(node);
+		node = NULL;
+	      }
+	      break;
+	    }
+
+        default : /* Ignore... */
+	    node = NULL;
+	    break;
+      }
+
+      if (*bufptr)
+      {
+       /*
+        * Bad integer/real number value...
+	*/
+
+        mxml_error("Bad %s value '%s' in parent <%s>!",
+	           type == MXML_INTEGER ? "integer" : "real", buffer,
+		   parent ? parent->value.element.name : "null");
+	break;
+      }
+
+      bufptr     = buffer;
+      whitespace = mxml_isspace(ch) && type == MXML_TEXT;
+
+      if (!node && type != MXML_IGNORE)
+      {
+       /*
+	* Print error and return...
+	*/
+
+	mxml_error("Unable to add value node of type %s to parent <%s>!",
+	           types[type], parent ? parent->value.element.name : "null");
+	goto error;
+      }
+
+      if (sax_cb)
+      {
+        (*sax_cb)(node, MXML_SAX_DATA, sax_data);
+
+        if (!mxmlRelease(node))
+          node = NULL;
+      }
+
+      if (!first && node)
+        first = node;
+    }
+    else if (mxml_isspace(ch) && type == MXML_TEXT)
+      whitespace = 1;
+
+   /*
+    * Add lone whitespace node if we have an element and existing
+    * whitespace...
+    */
+
+    if (ch == '<' && whitespace && type == MXML_TEXT)
+    {
+      if (parent)
+      {
+	node = mxmlNewText(parent, whitespace, "");
+
+	if (sax_cb)
+	{
+	  (*sax_cb)(node, MXML_SAX_DATA, sax_data);
+
+	  if (!mxmlRelease(node))
+	    node = NULL;
+	}
+
+	if (!first && node)
+	  first = node;
+      }
+
+      whitespace = 0;
+    }
+
+    if (ch == '<')
+    {
+     /*
+      * Start of open/close tag...
+      */
+
+      bufptr = buffer;
+
+      while ((ch = (*getc_cb)(p, &encoding)) != EOF)
+        if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
+	  break;
+	else if (ch == '<')
+	{
+	  mxml_error("Bare < in element!");
+	  goto error;
+	}
+	else if (ch == '&')
+	{
+	  if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
+	    goto error;
+
+	  if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	    goto error;
+	}
+	else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	  goto error;
+	else if (((bufptr - buffer) == 1 && buffer[0] == '?') ||
+	         ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) ||
+	         ((bufptr - buffer) == 8 && !strncmp(buffer, "![CDATA[", 8)))
+	  break;
+
+      *bufptr = '\0';
+
+      if (!strcmp(buffer, "!--"))
+      {
+       /*
+        * Gather rest of comment...
+	*/
+
+	while ((ch = (*getc_cb)(p, &encoding)) != EOF)
+	{
+	  if (ch == '>' && bufptr > (buffer + 4) &&
+	      bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-')
+	    break;
+	  else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	    goto error;
+	}
+
+       /*
+        * Error out if we didn't get the whole comment...
+	*/
+
+        if (ch != '>')
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Early EOF in comment node!");
+	  goto error;
+	}
+
+
+       /*
+        * Otherwise add this as an element under the current parent...
+	*/
+
+	*bufptr = '\0';
+
+        if (!parent && first)
+	{
+	 /*
+	  * There can only be one root element!
+	  */
+
+	  mxml_error("<%s> cannot be a second root node after <%s>",
+	             buffer, first->value.element.name);
+          goto error;
+	}
+
+	if ((node = mxmlNewElement(parent, buffer)) == NULL)
+	{
+	 /*
+	  * Just print error for now...
+	  */
+
+	  mxml_error("Unable to add comment node to parent <%s>!",
+	             parent ? parent->value.element.name : "null");
+	  break;
+	}
+
+        if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_COMMENT, sax_data);
+
+          if (!mxmlRelease(node))
+            node = NULL;
+        }
+
+	if (node && !first)
+	  first = node;
+      }
+      else if (!strcmp(buffer, "![CDATA["))
+      {
+       /*
+        * Gather CDATA section...
+	*/
+
+	while ((ch = (*getc_cb)(p, &encoding)) != EOF)
+	{
+	  if (ch == '>' && !strncmp(bufptr - 2, "]]", 2))
+	    break;
+	  else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	    goto error;
+	}
+
+       /*
+        * Error out if we didn't get the whole comment...
+	*/
+
+        if (ch != '>')
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Early EOF in CDATA node!");
+	  goto error;
+	}
+
+
+       /*
+        * Otherwise add this as an element under the current parent...
+	*/
+
+	*bufptr = '\0';
+
+        if (!parent && first)
+	{
+	 /*
+	  * There can only be one root element!
+	  */
+
+	  mxml_error("<%s> cannot be a second root node after <%s>",
+	             buffer, first->value.element.name);
+          goto error;
+	}
+
+	if ((node = mxmlNewElement(parent, buffer)) == NULL)
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Unable to add CDATA node to parent <%s>!",
+	             parent ? parent->value.element.name : "null");
+	  goto error;
+	}
+
+        if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_CDATA, sax_data);
+
+          if (!mxmlRelease(node))
+            node = NULL;
+        }
+
+	if (node && !first)
+	  first = node;
+      }
+      else if (buffer[0] == '?')
+      {
+       /*
+        * Gather rest of processing instruction...
+	*/
+
+	while ((ch = (*getc_cb)(p, &encoding)) != EOF)
+	{
+	  if (ch == '>' && bufptr > buffer && bufptr[-1] == '?')
+	    break;
+	  else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	    goto error;
+	}
+
+       /*
+        * Error out if we didn't get the whole processing instruction...
+	*/
+
+        if (ch != '>')
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Early EOF in processing instruction node!");
+	  goto error;
+	}
+
+       /*
+        * Otherwise add this as an element under the current parent...
+	*/
+
+	*bufptr = '\0';
+
+        if (!parent && first)
+	{
+	 /*
+	  * There can only be one root element!
+	  */
+
+	  mxml_error("<%s> cannot be a second root node after <%s>",
+	             buffer, first->value.element.name);
+          goto error;
+	}
+
+	if ((node = mxmlNewElement(parent, buffer)) == NULL)
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Unable to add processing instruction node to parent <%s>!",
+	             parent ? parent->value.element.name : "null");
+	  goto error;
+	}
+
+        if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data);
+
+          if (!mxmlRelease(node))
+            node = NULL;
+        }
+
+        if (node)
+	{
+	  if (!first)
+            first = node;
+
+	  if (!parent)
+	  {
+	    parent = node;
+
+	    if (cb)
+	      type = (*cb)(parent);
+	  }
+	}
+      }
+      else if (buffer[0] == '!')
+      {
+       /*
+        * Gather rest of declaration...
+	*/
+
+	do
+	{
+	  if (ch == '>')
+	    break;
+	  else
+	  {
+            if (ch == '&')
+	      if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
+		goto error;
+
+	    if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	      goto error;
+	  }
+	}
+        while ((ch = (*getc_cb)(p, &encoding)) != EOF);
+
+       /*
+        * Error out if we didn't get the whole declaration...
+	*/
+
+        if (ch != '>')
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Early EOF in declaration node!");
+	  goto error;
+	}
+
+       /*
+        * Otherwise add this as an element under the current parent...
+	*/
+
+	*bufptr = '\0';
+
+        if (!parent && first)
+	{
+	 /*
+	  * There can only be one root element!
+	  */
+
+	  mxml_error("<%s> cannot be a second root node after <%s>",
+	             buffer, first->value.element.name);
+          goto error;
+	}
+
+	if ((node = mxmlNewElement(parent, buffer)) == NULL)
+	{
+	 /*
+	  * Print error and return...
+	  */
+
+	  mxml_error("Unable to add declaration node to parent <%s>!",
+	             parent ? parent->value.element.name : "null");
+	  goto error;
+	}
+
+        if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data);
+
+          if (!mxmlRelease(node))
+            node = NULL;
+        }
+
+        if (node)
+	{
+	  if (!first)
+            first = node;
+
+	  if (!parent)
+	  {
+	    parent = node;
+
+	    if (cb)
+	      type = (*cb)(parent);
+	  }
+	}
+      }
+      else if (buffer[0] == '/')
+      {
+       /*
+        * Handle close tag...
+	*/
+
+        if (!parent || strcmp(buffer + 1, parent->value.element.name))
+	{
+	 /*
+	  * Close tag doesn't match tree; print an error for now...
+	  */
+
+	  mxml_error("Mismatched close tag <%s> under parent <%s>!",
+	             buffer, parent ? parent->value.element.name : "(null)");
+          goto error;
+	}
+
+       /*
+        * Keep reading until we see >...
+	*/
+
+        while (ch != '>' && ch != EOF)
+	  ch = (*getc_cb)(p, &encoding);
+
+        node   = parent;
+        parent = parent->parent;
+
+        if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data);
+
+          if (!mxmlRelease(node) && first == node)
+	    first = NULL;
+        }
+
+       /*
+	* Ascend into the parent and set the value type as needed...
+	*/
+
+	if (cb && parent)
+	  type = (*cb)(parent);
+      }
+      else
+      {
+       /*
+        * Handle open tag...
+	*/
+
+        if (!parent && first)
+	{
+	 /*
+	  * There can only be one root element!
+	  */
+
+	  mxml_error("<%s> cannot be a second root node after <%s>",
+	             buffer, first->value.element.name);
+          goto error;
+	}
+
+        if ((node = mxmlNewElement(parent, buffer)) == NULL)
+	{
+	 /*
+	  * Just print error for now...
+	  */
+
+	  mxml_error("Unable to add element node to parent <%s>!",
+	             parent ? parent->value.element.name : "null");
+	  goto error;
+	}
+
+        if (mxml_isspace(ch))
+        {
+	  if ((ch = mxml_parse_element(node, p, &encoding, getc_cb)) == EOF)
+	    goto error;
+        }
+        else if (ch == '/')
+	{
+	  if ((ch = (*getc_cb)(p, &encoding)) != '>')
+	  {
+	    mxml_error("Expected > but got '%c' instead for element <%s/>!",
+	               ch, buffer);
+            mxmlDelete(node);
+            goto error;
+	  }
+
+	  ch = '/';
+	}
+
+        if (sax_cb)
+          (*sax_cb)(node, MXML_SAX_ELEMENT_OPEN, sax_data);
+
+        if (!first)
+	  first = node;
+
+	if (ch == EOF)
+	  break;
+
+        if (ch != '/')
+	{
+	 /*
+	  * Descend into this node, setting the value type as needed...
+	  */
+
+	  parent = node;
+
+	  if (cb && parent)
+	    type = (*cb)(parent);
+	}
+        else if (sax_cb)
+        {
+          (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data);
+
+          if (!mxmlRelease(node) && first == node)
+            first = NULL;
+        }
+      }
+
+      bufptr  = buffer;
+    }
+    else if (ch == '&')
+    {
+     /*
+      * Add character entity to current buffer...
+      */
+
+      if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
+	goto error;
+
+      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	goto error;
+    }
+    else if (type == MXML_OPAQUE || type == MXML_CUSTOM || !mxml_isspace(ch))
+    {
+     /*
+      * Add character to current buffer...
+      */
+
+      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+	goto error;
+    }
+  }
+
+ /*
+  * Free the string buffer - we don't need it anymore...
+  */
+
+  free(buffer);
+
+ /*
+  * Find the top element and return it...
+  */
+
+  if (parent)
+  {
+    node = parent;
+
+    while (parent != top && parent->parent)
+      parent = parent->parent;
+
+    if (node != parent)
+    {
+      mxml_error("Missing close tag </%s> under parent <%s>!",
+	         node->value.element.name,
+		 node->parent ? node->parent->value.element.name : "(null)");
+
+      mxmlDelete(first);
+
+      return (NULL);
+    }
+  }
+
+  if (parent)
+    return (parent);
+  else
+    return (first);
+
+ /*
+  * Common error return...
+  */
+
+error:
+
+  mxmlDelete(first);
+
+  free(buffer);
+
+  return (NULL);
+}
+
+
+/*
+ * 'mxml_parse_element()' - Parse an element for any attributes...
+ */
+
+static int				/* O  - Terminating character */
+mxml_parse_element(
+    mxml_node_t     *node,		/* I  - Element node */
+    void            *p,			/* I  - Data to read from */
+    int             *encoding,		/* IO - Encoding */
+    _mxml_getc_cb_t getc_cb)		/* I  - Data callback */
+{
+  int	ch,				/* Current character in file */
+	quote;				/* Quoting character */
+  char	*name,				/* Attribute name */
+	*value,				/* Attribute value */
+	*ptr;				/* Pointer into name/value */
+  int	namesize,			/* Size of name string */
+	valsize;			/* Size of value string */
+
+
+ /*
+  * Initialize the name and value buffers...
+  */
+
+  if ((name = malloc(64)) == NULL)
+  {
+    mxml_error("Unable to allocate memory for name!");
+    return (EOF);
+  }
+
+  namesize = 64;
+
+  if ((value = malloc(64)) == NULL)
+  {
+    free(name);
+    mxml_error("Unable to allocate memory for value!");
+    return (EOF);
+  }
+
+  valsize = 64;
+
+ /*
+  * Loop until we hit a >, /, ?, or EOF...
+  */
+
+  while ((ch = (*getc_cb)(p, encoding)) != EOF)
+  {
+#if DEBUG > 1
+    fprintf(stderr, "parse_element: ch='%c'\n", ch);
+#endif /* DEBUG > 1 */
+
+   /*
+    * Skip leading whitespace...
+    */
+
+    if (mxml_isspace(ch))
+      continue;
+
+   /*
+    * Stop at /, ?, or >...
+    */
+
+    if (ch == '/' || ch == '?')
+    {
+     /*
+      * Grab the > character and print an error if it isn't there...
+      */
+
+      quote = (*getc_cb)(p, encoding);
+
+      if (quote != '>')
+      {
+        mxml_error("Expected '>' after '%c' for element %s, but got '%c'!",
+	           ch, node->value.element.name, quote);
+        goto error;
+      }
+
+      break;
+    }
+    else if (ch == '<')
+    {
+      mxml_error("Bare < in element %s!", node->value.element.name);
+      goto error;
+    }
+    else if (ch == '>')
+      break;
+
+   /*
+    * Read the attribute name...
+    */
+
+    name[0] = ch;
+    ptr     = name + 1;
+
+    if (ch == '\"' || ch == '\'')
+    {
+     /*
+      * Name is in quotes, so get a quoted string...
+      */
+
+      quote = ch;
+
+      while ((ch = (*getc_cb)(p, encoding)) != EOF)
+      {
+        if (ch == '&')
+	  if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
+	    goto error;
+
+	if (mxml_add_char(ch, &ptr, &name, &namesize))
+	  goto error;
+
+	if (ch == quote)
+          break;
+      }
+    }
+    else
+    {
+     /*
+      * Grab an normal, non-quoted name...
+      */
+
+      while ((ch = (*getc_cb)(p, encoding)) != EOF)
+	if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>' ||
+	    ch == '?')
+          break;
+	else
+	{
+          if (ch == '&')
+	    if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
+	      goto error;
+
+	  if (mxml_add_char(ch, &ptr, &name, &namesize))
+	    goto error;
+	}
+    }
+
+    *ptr = '\0';
+
+    if (mxmlElementGetAttr(node, name))
+      goto error;
+
+    while (ch != EOF && mxml_isspace(ch))
+      ch = (*getc_cb)(p, encoding);
+
+    if (ch == '=')
+    {
+     /*
+      * Read the attribute value...
+      */
+
+      while ((ch = (*getc_cb)(p, encoding)) != EOF && mxml_isspace(ch));
+
+      if (ch == EOF)
+      {
+        mxml_error("Missing value for attribute '%s' in element %s!",
+	           name, node->value.element.name);
+        goto error;
+      }
+
+      if (ch == '\'' || ch == '\"')
+      {
+       /*
+        * Read quoted value...
+	*/
+
+        quote = ch;
+	ptr   = value;
+
+        while ((ch = (*getc_cb)(p, encoding)) != EOF)
+	  if (ch == quote)
+	    break;
+	  else
+	  {
+	    if (ch == '&')
+	      if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
+	        goto error;
+
+	    if (mxml_add_char(ch, &ptr, &value, &valsize))
+	      goto error;
+	  }
+
+        *ptr = '\0';
+      }
+      else
+      {
+       /*
+        * Read unquoted value...
+	*/
+
+	value[0] = ch;
+	ptr      = value + 1;
+
+	while ((ch = (*getc_cb)(p, encoding)) != EOF)
+	  if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>')
+            break;
+	  else
+	  {
+	    if (ch == '&')
+	      if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
+	        goto error;
+
+	    if (mxml_add_char(ch, &ptr, &value, &valsize))
+	      goto error;
+	  }
+
+        *ptr = '\0';
+      }
+
+     /*
+      * Set the attribute with the given string value...
+      */
+
+      mxmlElementSetAttr(node, name, value);
+    }
+    else
+    {
+      mxml_error("Missing value for attribute '%s' in element %s!",
+	         name, node->value.element.name);
+      goto error;
+    }
+
+   /*
+    * Check the end character...
+    */
+
+    if (ch == '/' || ch == '?')
+    {
+     /*
+      * Grab the > character and print an error if it isn't there...
+      */
+
+      quote = (*getc_cb)(p, encoding);
+
+      if (quote != '>')
+      {
+        mxml_error("Expected '>' after '%c' for element %s, but got '%c'!",
+	           ch, node->value.element.name, quote);
+        ch = EOF;
+      }
+
+      break;
+    }
+    else if (ch == '>')
+      break;
+  }
+
+ /*
+  * Free the name and value buffers and return...
+  */
+
+  free(name);
+  free(value);
+
+  return (ch);
+
+ /*
+  * Common error return point...
+  */
+
+error:
+
+  free(name);
+  free(value);
+
+  return (EOF);
+}
+
+
+/*
+ * 'mxml_string_getc()' - Get a character from a string.
+ */
+
+static int				/* O  - Character or EOF */
+mxml_string_getc(void *p,		/* I  - Pointer to file */
+                 int  *encoding)	/* IO - Encoding */
+{
+  int		ch;			/* Character */
+  const char	**s;			/* Pointer to string pointer */
+
+
+  s = (const char **)p;
+
+  if ((ch = (*s)[0] & 255) != 0 || *encoding == ENCODE_UTF16LE)
+  {
+   /*
+    * Got character; convert UTF-8 to integer and return...
+    */
+
+    (*s)++;
+
+    switch (*encoding)
+    {
+      case ENCODE_UTF8 :
+	  if (!(ch & 0x80))
+	  {
+#if DEBUG > 1
+            printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	    if (mxml_bad_char(ch))
+	    {
+	      mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        		 ch);
+	      return (EOF);
+	    }
+
+	    return (ch);
+          }
+	  else if (ch == 0xfe)
+	  {
+	   /*
+	    * UTF-16 big-endian BOM?
+	    */
+
+            if (((*s)[0] & 255) != 0xff)
+	      return (EOF);
+
+	    *encoding = ENCODE_UTF16BE;
+	    (*s)++;
+
+	    return (mxml_string_getc(p, encoding));
+	  }
+	  else if (ch == 0xff)
+	  {
+	   /*
+	    * UTF-16 little-endian BOM?
+	    */
+
+            if (((*s)[0] & 255) != 0xfe)
+	      return (EOF);
+
+	    *encoding = ENCODE_UTF16LE;
+	    (*s)++;
+
+	    return (mxml_string_getc(p, encoding));
+	  }
+	  else if ((ch & 0xe0) == 0xc0)
+	  {
+	   /*
+	    * Two-byte value...
+	    */
+
+	    if (((*s)[0] & 0xc0) != 0x80)
+              return (EOF);
+
+	    ch = ((ch & 0x1f) << 6) | ((*s)[0] & 0x3f);
+
+	    (*s)++;
+
+	    if (ch < 0x80)
+	    {
+	      mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	      return (EOF);
+	    }
+
+#if DEBUG > 1
+            printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	    return (ch);
+	  }
+	  else if ((ch & 0xf0) == 0xe0)
+	  {
+	   /*
+	    * Three-byte value...
+	    */
+
+	    if (((*s)[0] & 0xc0) != 0x80 ||
+        	((*s)[1] & 0xc0) != 0x80)
+              return (EOF);
+
+	    ch = ((((ch & 0x0f) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f);
+
+	    (*s) += 2;
+
+	    if (ch < 0x800)
+	    {
+	      mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	      return (EOF);
+	    }
+
+	   /*
+	    * Ignore (strip) Byte Order Mark (BOM)...
+	    */
+
+	    if (ch == 0xfeff)
+	      return (mxml_string_getc(p, encoding));
+
+#if DEBUG > 1
+            printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	    return (ch);
+	  }
+	  else if ((ch & 0xf8) == 0xf0)
+	  {
+	   /*
+	    * Four-byte value...
+	    */
+
+	    if (((*s)[0] & 0xc0) != 0x80 ||
+        	((*s)[1] & 0xc0) != 0x80 ||
+        	((*s)[2] & 0xc0) != 0x80)
+              return (EOF);
+
+	    ch = ((((((ch & 0x07) << 6) | ((*s)[0] & 0x3f)) << 6) |
+        	   ((*s)[1] & 0x3f)) << 6) | ((*s)[2] & 0x3f);
+
+	    (*s) += 3;
+
+	    if (ch < 0x10000)
+	    {
+	      mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
+	      return (EOF);
+	    }
+
+#if DEBUG > 1
+            printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	    return (ch);
+	  }
+	  else
+	    return (EOF);
+
+      case ENCODE_UTF16BE :
+	 /*
+          * Read UTF-16 big-endian char...
+	  */
+
+	  ch = (ch << 8) | ((*s)[0] & 255);
+	  (*s) ++;
+
+          if (mxml_bad_char(ch))
+	  {
+	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	       ch);
+	    return (EOF);
+	  }
+          else if (ch >= 0xd800 && ch <= 0xdbff)
+	  {
+	   /*
+	    * Multi-word UTF-16 char...
+	    */
+
+            int lch;			/* Lower word */
+
+
+            if (!(*s)[0])
+	      return (EOF);
+
+            lch = (((*s)[0] & 255) << 8) | ((*s)[1] & 255);
+	    (*s) += 2;
+
+            if (lch < 0xdc00 || lch >= 0xdfff)
+	      return (EOF);
+
+            ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	  }
+
+#if DEBUG > 1
+          printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	  return (ch);
+
+      case ENCODE_UTF16LE :
+	 /*
+          * Read UTF-16 little-endian char...
+	  */
+
+	  ch = ch | (((*s)[0] & 255) << 8);
+
+	  if (!ch)
+	  {
+	    (*s) --;
+	    return (EOF);
+	  }
+
+	  (*s) ++;
+
+          if (mxml_bad_char(ch))
+	  {
+	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
+        	       ch);
+	    return (EOF);
+	  }
+          else if (ch >= 0xd800 && ch <= 0xdbff)
+	  {
+	   /*
+	    * Multi-word UTF-16 char...
+	    */
+
+            int lch;			/* Lower word */
+
+
+            if (!(*s)[1])
+	      return (EOF);
+
+            lch = (((*s)[1] & 255) << 8) | ((*s)[0] & 255);
+	    (*s) += 2;
+
+            if (lch < 0xdc00 || lch >= 0xdfff)
+	      return (EOF);
+
+            ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+	  }
+
+#if DEBUG > 1
+          printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
+#endif /* DEBUG > 1 */
+
+	  return (ch);
+    }
+  }
+
+  return (EOF);
+}
+
+
+/*
+ * 'mxml_string_putc()' - Write a character to a string.
+ */
+
+static int				/* O - 0 on success, -1 on failure */
+mxml_string_putc(int  ch,		/* I - Character to write */
+                 void *p)		/* I - Pointer to string pointers */
+{
+  char	**pp;				/* Pointer to string pointers */
+
+
+  pp = (char **)p;
+
+  if (pp[2] == 0) {
+    if (pp[0] < pp[1])
+      pp[0][0] = ch;
+    else
+      pp[2] = (char *)1;
+  }
+
+  pp[0] ++;
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_write_name()' - Write a name string.
+ */
+
+static int				/* O - 0 on success, -1 on failure */
+mxml_write_name(const char *s,		/* I - Name to write */
+                void       *p,		/* I - Write pointer */
+		int        (*putc_cb)(int, void *))
+					/* I - Write callback */
+{
+  char		quote;			/* Quote character */
+  const char	*name;			/* Entity name */
+
+
+  if (*s == '\"' || *s == '\'')
+  {
+   /*
+    * Write a quoted name string...
+    */
+
+    if ((*putc_cb)(*s, p) < 0)
+      return (-1);
+
+    quote = *s++;
+
+    while (*s && *s != quote)
+    {
+      if ((name = mxmlEntityGetName(*s)) != NULL)
+      {
+	if ((*putc_cb)('&', p) < 0)
+          return (-1);
+
+        while (*name)
+	{
+	  if ((*putc_cb)(*name, p) < 0)
+            return (-1);
+
+          name ++;
+	}
+
+	if ((*putc_cb)(';', p) < 0)
+          return (-1);
+      }
+      else if ((*putc_cb)(*s, p) < 0)
+	return (-1);
+
+      s ++;
+    }
+
+   /*
+    * Write the end quote...
+    */
+
+    if ((*putc_cb)(quote, p) < 0)
+      return (-1);
+  }
+  else
+  {
+   /*
+    * Write a non-quoted name string...
+    */
+
+    while (*s)
+    {
+      if ((*putc_cb)(*s, p) < 0)
+	return (-1);
+
+      s ++;
+    }
+  }
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_write_node()' - Save an XML node to a file.
+ */
+
+static int				/* O - Column or -1 on error */
+mxml_write_node(mxml_node_t     *node,	/* I - Node to write */
+                void            *p,	/* I - File to write to */
+	        mxml_save_cb_t  cb,	/* I - Whitespace callback */
+		int             col,	/* I - Current column */
+		_mxml_putc_cb_t putc_cb,/* I - Output callback */
+		_mxml_global_t  *global)/* I - Global data */
+{
+  int		i,			/* Looping var */
+		width;			/* Width of attr + value */
+  mxml_attr_t	*attr;			/* Current attribute */
+  char		s[255];			/* Temporary string */
+
+
+ /*
+  * Print the node value...
+  */
+
+  switch (node->type)
+  {
+    case MXML_ELEMENT :
+	col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
+
+	if ((*putc_cb)('<', p) < 0)
+	  return (-1);
+	if (node->value.element.name[0] == '?' ||
+	    !strncmp(node->value.element.name, "!--", 3) ||
+	    !strncmp(node->value.element.name, "![CDATA[", 8))
+	{
+	 /*
+	  * Comments, CDATA, and processing instructions do not
+	  * use character entities.
+	  */
+
+	  const char	*ptr;		/* Pointer into name */
+
+
+	  for (ptr = node->value.element.name; *ptr; ptr ++)
+	    if ((*putc_cb)(*ptr, p) < 0)
+	      return (-1);
+	}
+	else if (mxml_write_name(node->value.element.name, p, putc_cb) < 0)
+	  return (-1);
+
+	col += strlen(node->value.element.name) + 1;
+
+	for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
+	     i > 0;
+	     i --, attr ++)
+	{
+	  width = strlen(attr->name);
+
+	  if (attr->value)
+	    width += strlen(attr->value) + 3;
+
+	  if (global->wrap > 0 && (col + width) > global->wrap)
+	  {
+	    if ((*putc_cb)('\n', p) < 0)
+	      return (-1);
+
+	    col = 0;
+	  }
+	  else
+	  {
+	    if ((*putc_cb)(' ', p) < 0)
+	      return (-1);
+
+	    col ++;
+	  }
+
+	  if (mxml_write_name(attr->name, p, putc_cb) < 0)
+	    return (-1);
+
+	  if (attr->value)
+	  {
+	    if ((*putc_cb)('=', p) < 0)
+	      return (-1);
+	    if ((*putc_cb)('\"', p) < 0)
+	      return (-1);
+	    if (mxml_write_string(attr->value, p, putc_cb) < 0)
+	      return (-1);
+	    if ((*putc_cb)('\"', p) < 0)
+	      return (-1);
+	  }
+
+	  col += width;
+	}
+
+	if (node->child)
+	{
+	 /*
+	  * Write children...
+	  */
+
+	  mxml_node_t *child;		/* Current child */
+
+
+	  if ((*putc_cb)('>', p) < 0)
+	    return (-1);
+	  else
+	    col ++;
+
+	  col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
+
+          for (child = node->child; child; child = child->next)
+	  {
+	    if ((col = mxml_write_node(child, p, cb, col, putc_cb, global)) < 0)
+	      return (-1);
+	  }
+
+	 /*
+	  * The ? and ! elements are special-cases and have no end tags...
+	  */
+
+	  if (node->value.element.name[0] != '!' &&
+	      node->value.element.name[0] != '?')
+	  {
+	    col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_CLOSE, col, putc_cb);
+
+	    if ((*putc_cb)('<', p) < 0)
+	      return (-1);
+	    if ((*putc_cb)('/', p) < 0)
+	      return (-1);
+	    if (mxml_write_string(node->value.element.name, p, putc_cb) < 0)
+	      return (-1);
+	    if ((*putc_cb)('>', p) < 0)
+	      return (-1);
+
+	    col += strlen(node->value.element.name) + 3;
+
+	    col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_CLOSE, col, putc_cb);
+	  }
+	}
+	else if (node->value.element.name[0] == '!' ||
+		 node->value.element.name[0] == '?')
+	{
+	 /*
+	  * The ? and ! elements are special-cases...
+	  */
+
+	  if ((*putc_cb)('>', p) < 0)
+	    return (-1);
+	  else
+	    col ++;
+
+	  col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
+	}
+	else
+	{
+	  if ((*putc_cb)(' ', p) < 0)
+	    return (-1);
+	  if ((*putc_cb)('/', p) < 0)
+	    return (-1);
+	  if ((*putc_cb)('>', p) < 0)
+	    return (-1);
+
+	  col += 3;
+
+	  col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
+	}
+	break;
+
+    case MXML_INTEGER :
+	if (node->prev)
+	{
+	  if (global->wrap > 0 && col > global->wrap)
+	  {
+	    if ((*putc_cb)('\n', p) < 0)
+	      return (-1);
+
+	    col = 0;
+	  }
+	  else if ((*putc_cb)(' ', p) < 0)
+	    return (-1);
+	  else
+	    col ++;
+	}
+
+	sprintf(s, "%d", node->value.integer);
+	if (mxml_write_string(s, p, putc_cb) < 0)
+	  return (-1);
+
+	col += strlen(s);
+	break;
+
+    case MXML_OPAQUE :
+	if (mxml_write_string(node->value.opaque, p, putc_cb) < 0)
+	  return (-1);
+
+	col += strlen(node->value.opaque);
+	break;
+
+    case MXML_REAL :
+	if (node->prev)
+	{
+	  if (global->wrap > 0 && col > global->wrap)
+	  {
+	    if ((*putc_cb)('\n', p) < 0)
+	      return (-1);
+
+	    col = 0;
+	  }
+	  else if ((*putc_cb)(' ', p) < 0)
+	    return (-1);
+	  else
+	    col ++;
+	}
+
+	sprintf(s, "%f", node->value.real);
+	if (mxml_write_string(s, p, putc_cb) < 0)
+	  return (-1);
+
+	col += strlen(s);
+	break;
+
+    case MXML_TEXT :
+	if (node->value.text.whitespace && col > 0)
+	{
+	  if (global->wrap > 0 && col > global->wrap)
+	  {
+	    if ((*putc_cb)('\n', p) < 0)
+	      return (-1);
+
+	    col = 0;
+	  }
+	  else if ((*putc_cb)(' ', p) < 0)
+	    return (-1);
+	  else
+	    col ++;
+	}
+
+	if (mxml_write_string(node->value.text.string, p, putc_cb) < 0)
+	  return (-1);
+
+	col += strlen(node->value.text.string);
+	break;
+
+    case MXML_CUSTOM :
+	if (global->custom_save_cb)
+	{
+	  char	*data;		/* Custom data string */
+	  const char	*newline;	/* Last newline in string */
+
+
+	  if ((data = (*global->custom_save_cb)(node)) == NULL)
+	    return (-1);
+
+	  if (mxml_write_string(data, p, putc_cb) < 0)
+	    return (-1);
+
+	  if ((newline = strrchr(data, '\n')) == NULL)
+	    col += strlen(data);
+	  else
+	    col = strlen(newline);
+
+	  free(data);
+	  break;
+	}
+
+    default : /* Should never happen */
+	return (-1);
+  }
+
+  return (col);
+}
+
+
+/*
+ * 'mxml_write_string()' - Write a string, escaping & and < as needed.
+ */
+
+static int				/* O - 0 on success, -1 on failure */
+mxml_write_string(
+    const char      *s,			/* I - String to write */
+    void            *p,			/* I - Write pointer */
+    _mxml_putc_cb_t putc_cb)		/* I - Write callback */
+{
+  const char	*name;			/* Entity name, if any */
+
+
+  while (*s)
+  {
+    if ((name = mxmlEntityGetName(*s)) != NULL)
+    {
+      if ((*putc_cb)('&', p) < 0)
+        return (-1);
+
+      while (*name)
+      {
+	if ((*putc_cb)(*name, p) < 0)
+          return (-1);
+        name ++;
+      }
+
+      if ((*putc_cb)(';', p) < 0)
+        return (-1);
+    }
+    else if ((*putc_cb)(*s, p) < 0)
+      return (-1);
+
+    s ++;
+  }
+
+  return (0);
+}
+
+
+/*
+ * 'mxml_write_ws()' - Do whitespace callback...
+ */
+
+static int				/* O - New column */
+mxml_write_ws(mxml_node_t     *node,	/* I - Current node */
+              void            *p,	/* I - Write pointer */
+              mxml_save_cb_t  cb,	/* I - Callback function */
+	      int             ws,	/* I - Where value */
+	      int             col,	/* I - Current column */
+              _mxml_putc_cb_t putc_cb)	/* I - Write callback */
+{
+  const char	*s;			/* Whitespace string */
+
+
+  if (cb && (s = (*cb)(node, ws)) != NULL)
+  {
+    while (*s)
+    {
+      if ((*putc_cb)(*s, p) < 0)
+	return (-1);
+      else if (*s == '\n')
+	col = 0;
+      else if (*s == '\t')
+      {
+	col += MXML_TAB;
+	col = col - (col % MXML_TAB);
+      }
+      else
+	col ++;
+
+      s ++;
+    }
+  }
+
+  return (col);
+}
+
+
+/*
+ * End of "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-get.c b/tools/gator/daemon/mxml/mxml-get.c
new file mode 100644
index 000000000000..40ed3d0839b4
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-get.c
@@ -0,0 +1,452 @@
+/*
+ * "$Id: mxml-get.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Node get functions for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * 'mxmlGetCDATA()' - Get the value for a CDATA node.
+ *
+ * @code NULL@ is returned if the node is not a CDATA element.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+const char *				/* O - CDATA value or NULL */
+mxmlGetCDATA(mxml_node_t *node)		/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT ||
+      strncmp(node->value.element.name, "![CDATA[", 8))
+    return (NULL);
+
+ /*
+  * Return the text following the CDATA declaration...
+  */
+
+  return (node->value.element.name + 8);
+}
+
+
+/*
+ * 'mxmlGetCustom()' - Get the value for a custom node.
+ *
+ * @code NULL@ is returned if the node (or its first child) is not a custom
+ * value node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+const void *				/* O - Custom value or NULL */
+mxmlGetCustom(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the integer value...
+  */
+
+  if (node->type == MXML_CUSTOM)
+    return (node->value.custom.data);
+  else if (node->type == MXML_ELEMENT &&
+           node->child &&
+	   node->child->type == MXML_CUSTOM)
+    return (node->child->value.custom.data);
+  else
+    return (NULL);
+}
+
+
+/*
+ * 'mxmlGetElement()' - Get the name for an element node.
+ *
+ * @code NULL@ is returned if the node is not an element node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+const char *				/* O - Element name or NULL */
+mxmlGetElement(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT)
+    return (NULL);
+
+ /*
+  * Return the element name...
+  */
+
+  return (node->value.element.name);
+}
+
+
+/*
+ * 'mxmlGetFirstChild()' - Get the first child of an element node.
+ *
+ * @code NULL@ is returned if the node is not an element node or if the node
+ * has no children.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *				/* O - First child or NULL */
+mxmlGetFirstChild(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT)
+    return (NULL);
+
+ /*
+  * Return the first child node...
+  */
+
+  return (node->child);
+}
+
+
+/*
+ * 'mxmlGetInteger()' - Get the integer value from the specified node or its
+ *                      first child.
+ *
+ * 0 is returned if the node (or its first child) is not an integer value node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+int					/* O - Integer value or 0 */
+mxmlGetInteger(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (0);
+
+ /*
+  * Return the integer value...
+  */
+
+  if (node->type == MXML_INTEGER)
+    return (node->value.integer);
+  else if (node->type == MXML_ELEMENT &&
+           node->child &&
+	   node->child->type == MXML_INTEGER)
+    return (node->child->value.integer);
+  else
+    return (0);
+}
+
+
+/*
+ * 'mxmlGetLastChild()' - Get the last child of an element node.
+ *
+ * @code NULL@ is returned if the node is not an element node or if the node
+ * has no children.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *				/* O - Last child or NULL */
+mxmlGetLastChild(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT)
+    return (NULL);
+
+ /*
+  * Return the node type...
+  */
+
+  return (node->last_child);
+}
+
+
+/*
+ * 'mxmlGetNextSibling()' - Get the next node for the current parent.
+ *
+ * @code NULL@ is returned if this is the last child for the current parent.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *
+mxmlGetNextSibling(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the node type...
+  */
+
+  return (node->next);
+}
+
+
+/*
+ * 'mxmlGetOpaque()' - Get an opaque string value for a node or its first child.
+ *
+ * @code NULL@ is returned if the node (or its first child) is not an opaque
+ * value node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+const char *				/* O - Opaque string or NULL */
+mxmlGetOpaque(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the integer value...
+  */
+
+  if (node->type == MXML_OPAQUE)
+    return (node->value.opaque);
+  else if (node->type == MXML_ELEMENT &&
+           node->child &&
+	   node->child->type == MXML_OPAQUE)
+    return (node->child->value.opaque);
+  else
+    return (NULL);
+}
+
+
+/*
+ * 'mxmlGetParent()' - Get the parent node.
+ *
+ * @code NULL@ is returned for a root node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *				/* O - Parent node or NULL */
+mxmlGetParent(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the node type...
+  */
+
+  return (node->parent);
+}
+
+
+/*
+ * 'mxmlGetPrevSibling()' - Get the previous node for the current parent.
+ *
+ * @code NULL@ is returned if this is the first child for the current parent.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *				/* O - Previous node or NULL */
+mxmlGetPrevSibling(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the node type...
+  */
+
+  return (node->prev);
+}
+
+
+/*
+ * 'mxmlGetReal()' - Get the real value for a node or its first child.
+ *
+ * 0.0 is returned if the node (or its first child) is not a real value node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+double					/* O - Real value or 0.0 */
+mxmlGetReal(mxml_node_t *node)		/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (0.0);
+
+ /*
+  * Return the integer value...
+  */
+
+  if (node->type == MXML_REAL)
+    return (node->value.real);
+  else if (node->type == MXML_ELEMENT &&
+           node->child &&
+	   node->child->type == MXML_REAL)
+    return (node->child->value.real);
+  else
+    return (0.0);
+}
+
+
+/*
+ * 'mxmlGetText()' - Get the text value for a node or its first child.
+ *
+ * @code NULL@ is returned if the node (or its first child) is not a text node.
+ * The "whitespace" argument can be NULL.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+const char *				/* O - Text string or NULL */
+mxmlGetText(mxml_node_t *node,		/* I - Node to get */
+            int         *whitespace)	/* O - 1 if string is preceded by whitespace, 0 otherwise */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+  {
+    if (whitespace)
+      *whitespace = 0;
+
+    return (NULL);
+  }
+
+ /*
+  * Return the integer value...
+  */
+
+  if (node->type == MXML_TEXT)
+  {
+    if (whitespace)
+      *whitespace = node->value.text.whitespace;
+
+    return (node->value.text.string);
+  }
+  else if (node->type == MXML_ELEMENT &&
+           node->child &&
+	   node->child->type == MXML_TEXT)
+  {
+    if (whitespace)
+      *whitespace = node->child->value.text.whitespace;
+
+    return (node->child->value.text.string);
+  }
+  else
+  {
+    if (whitespace)
+      *whitespace = 0;
+
+    return (NULL);
+  }
+}
+
+
+/*
+ * 'mxmlGetType()' - Get the node type.
+ *
+ * @code MXML_IGNORE@ is returned if "node" is @code NULL@.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_type_t				/* O - Type of node */
+mxmlGetType(mxml_node_t *node)		/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (MXML_IGNORE);
+
+ /*
+  * Return the node type...
+  */
+
+  return (node->type);
+}
+
+
+/*
+ * 'mxmlGetUserData()' - Get the user data pointer for a node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+void *					/* O - User data pointer */
+mxmlGetUserData(mxml_node_t *node)	/* I - Node to get */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Return the user data pointer...
+  */
+
+  return (node->user_data);
+}
+
+
+/*
+ * End of "$Id: mxml-get.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-index.c b/tools/gator/daemon/mxml/mxml-index.c
new file mode 100644
index 000000000000..10814390d3a0
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-index.c
@@ -0,0 +1,659 @@
+/*
+ * "$Id: mxml-index.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Index support code for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * Sort functions...
+ */
+
+static int	index_compare(mxml_index_t *ind, mxml_node_t *first,
+		              mxml_node_t *second);
+static int	index_find(mxml_index_t *ind, const char *element,
+		           const char *value, mxml_node_t *node);
+static void	index_sort(mxml_index_t *ind, int left, int right);
+
+
+/*
+ * 'mxmlIndexDelete()' - Delete an index.
+ */
+
+void
+mxmlIndexDelete(mxml_index_t *ind)	/* I - Index to delete */
+{
+ /*
+  * Range check input..
+  */
+
+  if (!ind)
+    return;
+
+ /*
+  * Free memory...
+  */
+
+  if (ind->attr)
+    free(ind->attr);
+
+  if (ind->alloc_nodes)
+    free(ind->nodes);
+
+  free(ind);
+}
+
+
+/*
+ * 'mxmlIndexEnum()' - Return the next node in the index.
+ *
+ * Nodes are returned in the sorted order of the index.
+ */
+
+mxml_node_t *				/* O - Next node or NULL if there is none */
+mxmlIndexEnum(mxml_index_t *ind)	/* I - Index to enumerate */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!ind)
+    return (NULL);
+
+ /*
+  * Return the next node...
+  */
+
+  if (ind->cur_node < ind->num_nodes)
+    return (ind->nodes[ind->cur_node ++]);
+  else
+    return (NULL);
+}
+
+
+/*
+ * 'mxmlIndexFind()' - Find the next matching node.
+ *
+ * You should call mxmlIndexReset() prior to using this function for
+ * the first time with a particular set of "element" and "value"
+ * strings. Passing NULL for both "element" and "value" is equivalent
+ * to calling mxmlIndexEnum().
+ */
+
+mxml_node_t *				/* O - Node or NULL if none found */
+mxmlIndexFind(mxml_index_t *ind,	/* I - Index to search */
+              const char   *element,	/* I - Element name to find, if any */
+	      const char   *value)	/* I - Attribute value, if any */
+{
+  int		diff,			/* Difference between names */
+		current,		/* Current entity in search */
+		first,			/* First entity in search */
+		last;			/* Last entity in search */
+
+
+#ifdef DEBUG
+  printf("mxmlIndexFind(ind=%p, element=\"%s\", value=\"%s\")\n",
+         ind, element ? element : "(null)", value ? value : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!ind || (!ind->attr && value))
+  {
+#ifdef DEBUG
+    puts("    returning NULL...");
+    printf("    ind->attr=\"%s\"\n", ind->attr ? ind->attr : "(null)");
+#endif /* DEBUG */
+
+    return (NULL);
+  }
+
+ /*
+  * If both element and value are NULL, just enumerate the nodes in the
+  * index...
+  */
+
+  if (!element && !value)
+    return (mxmlIndexEnum(ind));
+
+ /*
+  * If there are no nodes in the index, return NULL...
+  */
+
+  if (!ind->num_nodes)
+  {
+#ifdef DEBUG
+    puts("    returning NULL...");
+    puts("    no nodes!");
+#endif /* DEBUG */
+
+    return (NULL);
+  }
+
+ /*
+  * If cur_node == 0, then find the first matching node...
+  */
+
+  if (ind->cur_node == 0)
+  {
+   /*
+    * Find the first node using a modified binary search algorithm...
+    */
+
+    first = 0;
+    last  = ind->num_nodes - 1;
+
+#ifdef DEBUG
+    printf("    find first time, num_nodes=%d...\n", ind->num_nodes);
+#endif /* DEBUG */
+
+    while ((last - first) > 1)
+    {
+      current = (first + last) / 2;
+
+#ifdef DEBUG
+      printf("    first=%d, last=%d, current=%d\n", first, last, current);
+#endif /* DEBUG */
+
+      if ((diff = index_find(ind, element, value, ind->nodes[current])) == 0)
+      {
+       /*
+        * Found a match, move back to find the first...
+	*/
+
+#ifdef DEBUG
+        puts("    match!");
+#endif /* DEBUG */
+
+        while (current > 0 &&
+	       !index_find(ind, element, value, ind->nodes[current - 1]))
+	  current --;
+
+#ifdef DEBUG
+        printf("    returning first match=%d\n", current);
+#endif /* DEBUG */
+
+       /*
+        * Return the first match and save the index to the next...
+	*/
+
+        ind->cur_node = current + 1;
+
+	return (ind->nodes[current]);
+      }
+      else if (diff < 0)
+	last = current;
+      else
+	first = current;
+
+#ifdef DEBUG
+      printf("    diff=%d\n", diff);
+#endif /* DEBUG */
+    }
+
+   /*
+    * If we get this far, then we found exactly 0 or 1 matches...
+    */
+
+    for (current = first; current <= last; current ++)
+      if (!index_find(ind, element, value, ind->nodes[current]))
+      {
+       /*
+	* Found exactly one (or possibly two) match...
+	*/
+
+#ifdef DEBUG
+	printf("    returning only match %d...\n", current);
+#endif /* DEBUG */
+
+	ind->cur_node = current + 1;
+
+	return (ind->nodes[current]);
+      }
+
+   /*
+    * No matches...
+    */
+
+    ind->cur_node = ind->num_nodes;
+
+#ifdef DEBUG
+    puts("    returning NULL...");
+#endif /* DEBUG */
+
+    return (NULL);
+  }
+  else if (ind->cur_node < ind->num_nodes &&
+           !index_find(ind, element, value, ind->nodes[ind->cur_node]))
+  {
+   /*
+    * Return the next matching node...
+    */
+
+#ifdef DEBUG
+    printf("    returning next match %d...\n", ind->cur_node);
+#endif /* DEBUG */
+
+    return (ind->nodes[ind->cur_node ++]);
+  }
+
+ /*
+  * If we get this far, then we have no matches...
+  */
+
+  ind->cur_node = ind->num_nodes;
+
+#ifdef DEBUG
+  puts("    returning NULL...");
+#endif /* DEBUG */
+
+  return (NULL);
+}
+
+
+/*
+ * 'mxmlIndexGetCount()' - Get the number of nodes in an index.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+int					/* I - Number of nodes in index */
+mxmlIndexGetCount(mxml_index_t *ind)	/* I - Index of nodes */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!ind)
+    return (0);
+
+ /*
+  * Return the number of nodes in the index...
+  */
+
+  return (ind->num_nodes);
+}
+
+
+/*
+ * 'mxmlIndexNew()' - Create a new index.
+ *
+ * The index will contain all nodes that contain the named element and/or
+ * attribute. If both "element" and "attr" are NULL, then the index will
+ * contain a sorted list of the elements in the node tree.  Nodes are
+ * sorted by element name and optionally by attribute value if the "attr"
+ * argument is not NULL.
+ */
+
+mxml_index_t *				/* O - New index */
+mxmlIndexNew(mxml_node_t *node,		/* I - XML node tree */
+             const char  *element,	/* I - Element to index or NULL for all */
+             const char  *attr)		/* I - Attribute to index or NULL for none */
+{
+  mxml_index_t	*ind;			/* New index */
+  mxml_node_t	*current,		/* Current node in index */
+  		**temp;			/* Temporary node pointer array */
+
+
+ /*
+  * Range check input...
+  */
+
+#ifdef DEBUG
+  printf("mxmlIndexNew(node=%p, element=\"%s\", attr=\"%s\")\n",
+         node, element ? element : "(null)", attr ? attr : "(null)");
+#endif /* DEBUG */
+
+  if (!node)
+    return (NULL);
+
+ /*
+  * Create a new index...
+  */
+
+  if ((ind = calloc(1, sizeof(mxml_index_t))) == NULL)
+  {
+    mxml_error("Unable to allocate %d bytes for index - %s",
+               sizeof(mxml_index_t), strerror(errno));
+    return (NULL);
+  }
+
+  if (attr)
+    ind->attr = strdup(attr);
+
+  if (!element && !attr)
+    current = node;
+  else
+    current = mxmlFindElement(node, node, element, attr, NULL, MXML_DESCEND);
+
+  while (current)
+  {
+    if (ind->num_nodes >= ind->alloc_nodes)
+    {
+      if (!ind->alloc_nodes)
+        temp = malloc(64 * sizeof(mxml_node_t *));
+      else
+        temp = realloc(ind->nodes, (ind->alloc_nodes + 64) * sizeof(mxml_node_t *));
+
+      if (!temp)
+      {
+       /*
+        * Unable to allocate memory for the index, so abort...
+	*/
+
+        mxml_error("Unable to allocate %d bytes for index: %s",
+	           (ind->alloc_nodes + 64) * sizeof(mxml_node_t *),
+		   strerror(errno));
+
+        mxmlIndexDelete(ind);
+	return (NULL);
+      }
+
+      ind->nodes       = temp;
+      ind->alloc_nodes += 64;
+    }
+
+    ind->nodes[ind->num_nodes ++] = current;
+
+    current = mxmlFindElement(current, node, element, attr, NULL, MXML_DESCEND);
+  }
+
+ /*
+  * Sort nodes based upon the search criteria...
+  */
+
+#ifdef DEBUG
+  {
+    int i;				/* Looping var */
+
+
+    printf("%d node(s) in index.\n\n", ind->num_nodes);
+
+    if (attr)
+    {
+      printf("Node      Address   Element         %s\n", attr);
+      puts("--------  --------  --------------  ------------------------------");
+
+      for (i = 0; i < ind->num_nodes; i ++)
+	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
+	       ind->nodes[i]->value.element.name,
+	       mxmlElementGetAttr(ind->nodes[i], attr));
+    }
+    else
+    {
+      puts("Node      Address   Element");
+      puts("--------  --------  --------------");
+
+      for (i = 0; i < ind->num_nodes; i ++)
+	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
+	       ind->nodes[i]->value.element.name);
+    }
+
+    putchar('\n');
+  }
+#endif /* DEBUG */
+
+  if (ind->num_nodes > 1)
+    index_sort(ind, 0, ind->num_nodes - 1);
+
+#ifdef DEBUG
+  {
+    int i;				/* Looping var */
+
+
+    puts("After sorting:\n");
+
+    if (attr)
+    {
+      printf("Node      Address   Element         %s\n", attr);
+      puts("--------  --------  --------------  ------------------------------");
+
+      for (i = 0; i < ind->num_nodes; i ++)
+	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
+	       ind->nodes[i]->value.element.name,
+	       mxmlElementGetAttr(ind->nodes[i], attr));
+    }
+    else
+    {
+      puts("Node      Address   Element");
+      puts("--------  --------  --------------");
+
+      for (i = 0; i < ind->num_nodes; i ++)
+	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
+	       ind->nodes[i]->value.element.name);
+    }
+
+    putchar('\n');
+  }
+#endif /* DEBUG */
+
+ /*
+  * Return the new index...
+  */
+
+  return (ind);
+}
+
+
+/*
+ * 'mxmlIndexReset()' - Reset the enumeration/find pointer in the index and
+ *                      return the first node in the index.
+ *
+ * This function should be called prior to using mxmlIndexEnum() or
+ * mxmlIndexFind() for the first time.
+ */
+
+mxml_node_t *				/* O - First node or NULL if there is none */
+mxmlIndexReset(mxml_index_t *ind)	/* I - Index to reset */
+{
+#ifdef DEBUG
+  printf("mxmlIndexReset(ind=%p)\n", ind);
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!ind)
+    return (NULL);
+
+ /*
+  * Set the index to the first element...
+  */
+
+  ind->cur_node = 0;
+
+ /*
+  * Return the first node...
+  */
+
+  if (ind->num_nodes)
+    return (ind->nodes[0]);
+  else
+    return (NULL);
+}
+
+
+/*
+ * 'index_compare()' - Compare two nodes.
+ */
+
+static int				/* O - Result of comparison */
+index_compare(mxml_index_t *ind,	/* I - Index */
+              mxml_node_t  *first,	/* I - First node */
+              mxml_node_t  *second)	/* I - Second node */
+{
+  int	diff;				/* Difference */
+
+
+ /*
+  * Check the element name...
+  */
+
+  if ((diff = strcmp(first->value.element.name,
+                     second->value.element.name)) != 0)
+    return (diff);
+
+ /*
+  * Check the attribute value...
+  */
+
+  if (ind->attr)
+  {
+    if ((diff = strcmp(mxmlElementGetAttr(first, ind->attr),
+                       mxmlElementGetAttr(second, ind->attr))) != 0)
+      return (diff);
+  }
+
+ /*
+  * No difference, return 0...
+  */
+
+  return (0);
+}
+
+
+/*
+ * 'index_find()' - Compare a node with index values.
+ */
+
+static int				/* O - Result of comparison */
+index_find(mxml_index_t *ind,		/* I - Index */
+           const char   *element,	/* I - Element name or NULL */
+	   const char   *value,		/* I - Attribute value or NULL */
+           mxml_node_t  *node)		/* I - Node */
+{
+  int	diff;				/* Difference */
+
+
+ /*
+  * Check the element name...
+  */
+
+  if (element)
+  {
+    if ((diff = strcmp(element, node->value.element.name)) != 0)
+      return (diff);
+  }
+
+ /*
+  * Check the attribute value...
+  */
+
+  if (value)
+  {
+    if ((diff = strcmp(value, mxmlElementGetAttr(node, ind->attr))) != 0)
+      return (diff);
+  }
+
+ /*
+  * No difference, return 0...
+  */
+
+  return (0);
+}
+
+
+/*
+ * 'index_sort()' - Sort the nodes in the index...
+ *
+ * This function implements the classic quicksort algorithm...
+ */
+
+static void
+index_sort(mxml_index_t *ind,		/* I - Index to sort */
+           int          left,		/* I - Left node in partition */
+	   int          right)		/* I - Right node in partition */
+{
+  mxml_node_t	*pivot,			/* Pivot node */
+		*temp;			/* Swap node */
+  int		templ,			/* Temporary left node */
+		tempr;			/* Temporary right node */
+
+
+ /*
+  * Loop until we have sorted all the way to the right...
+  */
+
+  do
+  {
+   /*
+    * Sort the pivot in the current partition...
+    */
+
+    pivot = ind->nodes[left];
+
+    for (templ = left, tempr = right; templ < tempr;)
+    {
+     /*
+      * Move left while left node <= pivot node...
+      */
+
+      while ((templ < right) &&
+             index_compare(ind, ind->nodes[templ], pivot) <= 0)
+	templ ++;
+
+     /*
+      * Move right while right node > pivot node...
+      */
+
+      while ((tempr > left) &&
+             index_compare(ind, ind->nodes[tempr], pivot) > 0)
+	tempr --;
+
+     /*
+      * Swap nodes if needed...
+      */
+
+      if (templ < tempr)
+      {
+	temp              = ind->nodes[templ];
+	ind->nodes[templ] = ind->nodes[tempr];
+	ind->nodes[tempr] = temp;
+      }
+    }
+
+   /*
+    * When we get here, the right (tempr) node is the new position for the
+    * pivot node...
+    */
+
+    if (index_compare(ind, pivot, ind->nodes[tempr]) > 0)
+    {
+      ind->nodes[left]  = ind->nodes[tempr];
+      ind->nodes[tempr] = pivot;
+    }
+
+   /*
+    * Recursively sort the left partition as needed...
+    */
+
+    if (left < (tempr - 1))
+      index_sort(ind, left, tempr - 1);
+  }
+  while (right > (left = tempr + 1));
+}
+
+
+/*
+ * End of "$Id: mxml-index.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-node.c b/tools/gator/daemon/mxml/mxml-node.c
new file mode 100644
index 000000000000..128cda1a4cf2
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-node.c
@@ -0,0 +1,788 @@
+/*
+ * "$Id: mxml-node.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Node support code for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * Local functions...
+ */
+
+static mxml_node_t	*mxml_new(mxml_node_t *parent, mxml_type_t type);
+
+
+/*
+ * 'mxmlAdd()' - Add a node to a tree.
+ *
+ * Adds the specified node to the parent. If the child argument is not
+ * NULL, puts the new node before or after the specified child depending
+ * on the value of the where argument. If the child argument is NULL,
+ * puts the new node at the beginning of the child list (MXML_ADD_BEFORE)
+ * or at the end of the child list (MXML_ADD_AFTER). The constant
+ * MXML_ADD_TO_PARENT can be used to specify a NULL child pointer.
+ */
+
+void
+mxmlAdd(mxml_node_t *parent,		/* I - Parent node */
+        int         where,		/* I - Where to add, MXML_ADD_BEFORE or MXML_ADD_AFTER */
+        mxml_node_t *child,		/* I - Child node for where or MXML_ADD_TO_PARENT */
+	mxml_node_t *node)		/* I - Node to add */
+{
+#ifdef DEBUG
+  fprintf(stderr, "mxmlAdd(parent=%p, where=%d, child=%p, node=%p)\n", parent,
+          where, child, node);
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!parent || !node)
+    return;
+
+#if DEBUG > 1
+  fprintf(stderr, "    BEFORE: node->parent=%p\n", node->parent);
+  if (parent)
+  {
+    fprintf(stderr, "    BEFORE: parent->child=%p\n", parent->child);
+    fprintf(stderr, "    BEFORE: parent->last_child=%p\n", parent->last_child);
+    fprintf(stderr, "    BEFORE: parent->prev=%p\n", parent->prev);
+    fprintf(stderr, "    BEFORE: parent->next=%p\n", parent->next);
+  }
+#endif /* DEBUG > 1 */
+
+ /*
+  * Remove the node from any existing parent...
+  */
+
+  if (node->parent)
+    mxmlRemove(node);
+
+ /*
+  * Reset pointers...
+  */
+
+  node->parent = parent;
+
+  switch (where)
+  {
+    case MXML_ADD_BEFORE :
+        if (!child || child == parent->child || child->parent != parent)
+	{
+	 /*
+	  * Insert as first node under parent...
+	  */
+
+	  node->next = parent->child;
+
+	  if (parent->child)
+	    parent->child->prev = node;
+	  else
+	    parent->last_child = node;
+
+	  parent->child = node;
+	}
+	else
+	{
+	 /*
+	  * Insert node before this child...
+	  */
+
+	  node->next = child;
+	  node->prev = child->prev;
+
+	  if (child->prev)
+	    child->prev->next = node;
+	  else
+	    parent->child = node;
+
+	  child->prev = node;
+	}
+        break;
+
+    case MXML_ADD_AFTER :
+        if (!child || child == parent->last_child || child->parent != parent)
+	{
+	 /*
+	  * Insert as last node under parent...
+	  */
+
+	  node->parent = parent;
+	  node->prev   = parent->last_child;
+
+	  if (parent->last_child)
+	    parent->last_child->next = node;
+	  else
+	    parent->child = node;
+
+	  parent->last_child = node;
+        }
+	else
+	{
+	 /*
+	  * Insert node after this child...
+	  */
+
+	  node->prev = child;
+	  node->next = child->next;
+
+	  if (child->next)
+	    child->next->prev = node;
+	  else
+	    parent->last_child = node;
+
+	  child->next = node;
+	}
+        break;
+  }
+
+#if DEBUG > 1
+  fprintf(stderr, "    AFTER: node->parent=%p\n", node->parent);
+  if (parent)
+  {
+    fprintf(stderr, "    AFTER: parent->child=%p\n", parent->child);
+    fprintf(stderr, "    AFTER: parent->last_child=%p\n", parent->last_child);
+    fprintf(stderr, "    AFTER: parent->prev=%p\n", parent->prev);
+    fprintf(stderr, "    AFTER: parent->next=%p\n", parent->next);
+  }
+#endif /* DEBUG > 1 */
+}
+
+
+/*
+ * 'mxmlDelete()' - Delete a node and all of its children.
+ *
+ * If the specified node has a parent, this function first removes the
+ * node from its parent using the mxmlRemove() function.
+ */
+
+void
+mxmlDelete(mxml_node_t *node)		/* I - Node to delete */
+{
+  int	i;				/* Looping var */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlDelete(node=%p)\n", node);
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return;
+
+ /*
+  * Remove the node from its parent, if any...
+  */
+
+  mxmlRemove(node);
+
+ /*
+  * Delete children...
+  */
+
+  while (node->child)
+    mxmlDelete(node->child);
+
+ /*
+  * Now delete any node data...
+  */
+
+  switch (node->type)
+  {
+    case MXML_ELEMENT :
+        if (node->value.element.name)
+	  free(node->value.element.name);
+
+	if (node->value.element.num_attrs)
+	{
+	  for (i = 0; i < node->value.element.num_attrs; i ++)
+	  {
+	    if (node->value.element.attrs[i].name)
+	      free(node->value.element.attrs[i].name);
+	    if (node->value.element.attrs[i].value)
+	      free(node->value.element.attrs[i].value);
+	  }
+
+          free(node->value.element.attrs);
+	}
+        break;
+    case MXML_INTEGER :
+       /* Nothing to do */
+        break;
+    case MXML_OPAQUE :
+        if (node->value.opaque)
+	  free(node->value.opaque);
+        break;
+    case MXML_REAL :
+       /* Nothing to do */
+        break;
+    case MXML_TEXT :
+        if (node->value.text.string)
+	  free(node->value.text.string);
+        break;
+    case MXML_CUSTOM :
+        if (node->value.custom.data &&
+	    node->value.custom.destroy)
+	  (*(node->value.custom.destroy))(node->value.custom.data);
+	break;
+    default :
+        break;
+  }
+
+ /*
+  * Free this node...
+  */
+
+  free(node);
+}
+
+
+/*
+ * 'mxmlGetRefCount()' - Get the current reference (use) count for a node.
+ *
+ * The initial reference count of new nodes is 1. Use the @link mxmlRetain@
+ * and @link mxmlRelease@ functions to increment and decrement a node's
+ * reference count.
+ *
+ * @since Mini-XML 2.7@.
+ */
+
+int					/* O - Reference count */
+mxmlGetRefCount(mxml_node_t *node)	/* I - Node */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (0);
+
+ /*
+  * Return the reference count...
+  */
+
+  return (node->ref_count);
+}
+
+
+/*
+ * 'mxmlNewCDATA()' - Create a new CDATA node.
+ *
+ * The new CDATA node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * CDATA node has no parent. The data string must be nul-terminated and
+ * is copied into the new node. CDATA nodes use the MXML_ELEMENT type.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewCDATA(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+	     const char  *data)		/* I - Data string */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewCDATA(parent=%p, data=\"%s\")\n",
+          parent, data ? data : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!data)
+    return (NULL);
+
+ /*
+  * Create the node and set the name value...
+  */
+
+  if ((node = mxml_new(parent, MXML_ELEMENT)) != NULL)
+    node->value.element.name = _mxml_strdupf("![CDATA[%s]]", data);
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewCustom()' - Create a new custom data node.
+ *
+ * The new custom node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * element node has no parent. NULL can be passed when the data in the
+ * node is not dynamically allocated or is separately managed.
+ *
+ * @since Mini-XML 2.1@
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewCustom(
+    mxml_node_t              *parent,	/* I - Parent node or MXML_NO_PARENT */
+    void                     *data,	/* I - Pointer to data */
+    mxml_custom_destroy_cb_t destroy)	/* I - Function to destroy data */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewCustom(parent=%p, data=%p, destroy=%p)\n", parent,
+          data, destroy);
+#endif /* DEBUG */
+
+ /*
+  * Create the node and set the value...
+  */
+
+  if ((node = mxml_new(parent, MXML_CUSTOM)) != NULL)
+  {
+    node->value.custom.data    = data;
+    node->value.custom.destroy = destroy;
+  }
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewElement()' - Create a new element node.
+ *
+ * The new element node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * element node has no parent.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewElement(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+               const char  *name)	/* I - Name of element */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewElement(parent=%p, name=\"%s\")\n", parent,
+          name ? name : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!name)
+    return (NULL);
+
+ /*
+  * Create the node and set the element name...
+  */
+
+  if ((node = mxml_new(parent, MXML_ELEMENT)) != NULL)
+    node->value.element.name = strdup(name);
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewInteger()' - Create a new integer node.
+ *
+ * The new integer node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * integer node has no parent.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewInteger(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+               int         integer)	/* I - Integer value */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewInteger(parent=%p, integer=%d)\n", parent, integer);
+#endif /* DEBUG */
+
+ /*
+  * Create the node and set the element name...
+  */
+
+  if ((node = mxml_new(parent, MXML_INTEGER)) != NULL)
+    node->value.integer = integer;
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewOpaque()' - Create a new opaque string.
+ *
+ * The new opaque node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * opaque node has no parent. The opaque string must be nul-terminated and
+ * is copied into the new node.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewOpaque(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+              const char  *opaque)	/* I - Opaque string */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewOpaque(parent=%p, opaque=\"%s\")\n", parent,
+          opaque ? opaque : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!opaque)
+    return (NULL);
+
+ /*
+  * Create the node and set the element name...
+  */
+
+  if ((node = mxml_new(parent, MXML_OPAQUE)) != NULL)
+    node->value.opaque = strdup(opaque);
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewReal()' - Create a new real number node.
+ *
+ * The new real number node is added to the end of the specified parent's
+ * child list. The constant MXML_NO_PARENT can be used to specify that
+ * the new real number node has no parent.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewReal(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+            double      real)		/* I - Real number value */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewReal(parent=%p, real=%g)\n", parent, real);
+#endif /* DEBUG */
+
+ /*
+  * Create the node and set the element name...
+  */
+
+  if ((node = mxml_new(parent, MXML_REAL)) != NULL)
+    node->value.real = real;
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewText()' - Create a new text fragment node.
+ *
+ * The new text node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * text node has no parent. The whitespace parameter is used to specify
+ * whether leading whitespace is present before the node. The text
+ * string must be nul-terminated and is copied into the new node.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewText(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+            int         whitespace,	/* I - 1 = leading whitespace, 0 = no whitespace */
+	    const char  *string)	/* I - String */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewText(parent=%p, whitespace=%d, string=\"%s\")\n",
+          parent, whitespace, string ? string : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!string)
+    return (NULL);
+
+ /*
+  * Create the node and set the text value...
+  */
+
+  if ((node = mxml_new(parent, MXML_TEXT)) != NULL)
+  {
+    node->value.text.whitespace = whitespace;
+    node->value.text.string     = strdup(string);
+  }
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlNewTextf()' - Create a new formatted text fragment node.
+ *
+ * The new text node is added to the end of the specified parent's child
+ * list. The constant MXML_NO_PARENT can be used to specify that the new
+ * text node has no parent. The whitespace parameter is used to specify
+ * whether leading whitespace is present before the node. The format
+ * string must be nul-terminated and is formatted into the new node.
+ */
+
+mxml_node_t *				/* O - New node */
+mxmlNewTextf(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
+             int         whitespace,	/* I - 1 = leading whitespace, 0 = no whitespace */
+	     const char  *format,	/* I - Printf-style frmat string */
+	     ...)			/* I - Additional args as needed */
+{
+  mxml_node_t	*node;			/* New node */
+  va_list	ap;			/* Pointer to arguments */
+
+
+#ifdef DEBUG
+  fprintf(stderr, "mxmlNewTextf(parent=%p, whitespace=%d, format=\"%s\", ...)\n",
+          parent, whitespace, format ? format : "(null)");
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!format)
+    return (NULL);
+
+ /*
+  * Create the node and set the text value...
+  */
+
+  if ((node = mxml_new(parent, MXML_TEXT)) != NULL)
+  {
+    va_start(ap, format);
+
+    node->value.text.whitespace = whitespace;
+    node->value.text.string     = _mxml_vstrdupf(format, ap);
+
+    va_end(ap);
+  }
+
+  return (node);
+}
+
+
+/*
+ * 'mxmlRemove()' - Remove a node from its parent.
+ *
+ * Does not free memory used by the node - use mxmlDelete() for that.
+ * This function does nothing if the node has no parent.
+ */
+
+void
+mxmlRemove(mxml_node_t *node)		/* I - Node to remove */
+{
+#ifdef DEBUG
+  fprintf(stderr, "mxmlRemove(node=%p)\n", node);
+#endif /* DEBUG */
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || !node->parent)
+    return;
+
+ /*
+  * Remove from parent...
+  */
+
+#if DEBUG > 1
+  fprintf(stderr, "    BEFORE: node->parent=%p\n", node->parent);
+  if (node->parent)
+  {
+    fprintf(stderr, "    BEFORE: node->parent->child=%p\n", node->parent->child);
+    fprintf(stderr, "    BEFORE: node->parent->last_child=%p\n", node->parent->last_child);
+  }
+  fprintf(stderr, "    BEFORE: node->child=%p\n", node->child);
+  fprintf(stderr, "    BEFORE: node->last_child=%p\n", node->last_child);
+  fprintf(stderr, "    BEFORE: node->prev=%p\n", node->prev);
+  fprintf(stderr, "    BEFORE: node->next=%p\n", node->next);
+#endif /* DEBUG > 1 */
+
+  if (node->prev)
+    node->prev->next = node->next;
+  else
+    node->parent->child = node->next;
+
+  if (node->next)
+    node->next->prev = node->prev;
+  else
+    node->parent->last_child = node->prev;
+
+  node->parent = NULL;
+  node->prev   = NULL;
+  node->next   = NULL;
+
+#if DEBUG > 1
+  fprintf(stderr, "    AFTER: node->parent=%p\n", node->parent);
+  if (node->parent)
+  {
+    fprintf(stderr, "    AFTER: node->parent->child=%p\n", node->parent->child);
+    fprintf(stderr, "    AFTER: node->parent->last_child=%p\n", node->parent->last_child);
+  }
+  fprintf(stderr, "    AFTER: node->child=%p\n", node->child);
+  fprintf(stderr, "    AFTER: node->last_child=%p\n", node->last_child);
+  fprintf(stderr, "    AFTER: node->prev=%p\n", node->prev);
+  fprintf(stderr, "    AFTER: node->next=%p\n", node->next);
+#endif /* DEBUG > 1 */
+}
+
+
+/*
+ * 'mxmlNewXML()' - Create a new XML document tree.
+ *
+ * The "version" argument specifies the version number to put in the
+ * ?xml element node. If NULL, version 1.0 is assumed.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+mxml_node_t *				/* O - New ?xml node */
+mxmlNewXML(const char *version)		/* I - Version number to use */
+{
+  char	element[1024];			/* Element text */
+
+
+  snprintf(element, sizeof(element), "?xml version=\"%s\" encoding=\"utf-8\"?",
+           version ? version : "1.0");
+
+  return (mxmlNewElement(NULL, element));
+}
+
+
+/*
+ * 'mxmlRelease()' - Release a node.
+ *
+ * When the reference count reaches zero, the node (and any children)
+ * is deleted via mxmlDelete().
+ *
+ * @since Mini-XML 2.3@
+ */
+
+int					/* O - New reference count */
+mxmlRelease(mxml_node_t *node)		/* I - Node */
+{
+  if (node)
+  {
+    if ((-- node->ref_count) <= 0)
+    {
+      mxmlDelete(node);
+      return (0);
+    }
+    else
+      return (node->ref_count);
+  }
+  else
+    return (-1);
+}
+
+
+/*
+ * 'mxmlRetain()' - Retain a node.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+int					/* O - New reference count */
+mxmlRetain(mxml_node_t *node)		/* I - Node */
+{
+  if (node)
+    return (++ node->ref_count);
+  else
+    return (-1);
+}
+
+
+/*
+ * 'mxml_new()' - Create a new node.
+ */
+
+static mxml_node_t *			/* O - New node */
+mxml_new(mxml_node_t *parent,		/* I - Parent node */
+         mxml_type_t type)		/* I - Node type */
+{
+  mxml_node_t	*node;			/* New node */
+
+
+#if DEBUG > 1
+  fprintf(stderr, "mxml_new(parent=%p, type=%d)\n", parent, type);
+#endif /* DEBUG > 1 */
+
+ /*
+  * Allocate memory for the node...
+  */
+
+  if ((node = calloc(1, sizeof(mxml_node_t))) == NULL)
+  {
+#if DEBUG > 1
+    fputs("    returning NULL\n", stderr);
+#endif /* DEBUG > 1 */
+
+    return (NULL);
+  }
+
+#if DEBUG > 1
+  fprintf(stderr, "    returning %p\n", node);
+#endif /* DEBUG > 1 */
+
+ /*
+  * Set the node type...
+  */
+
+  node->type      = type;
+  node->ref_count = 1;
+
+ /*
+  * Add to the parent if present...
+  */
+
+  if (parent)
+    mxmlAdd(parent, MXML_ADD_AFTER, MXML_ADD_TO_PARENT, node);
+
+ /*
+  * Return the new node...
+  */
+
+  return (node);
+}
+
+
+/*
+ * End of "$Id: mxml-node.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-private.c b/tools/gator/daemon/mxml/mxml-private.c
new file mode 100644
index 000000000000..bec4bbfbf378
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-private.c
@@ -0,0 +1,323 @@
+/*
+ * "$Id: mxml-private.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Private functions for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "mxml-private.h"
+
+
+/*
+ * Some crazy people think that unloading a shared object is a good or safe
+ * thing to do.  Unfortunately, most objects are simply *not* safe to unload
+ * and bad things *will* happen.
+ *
+ * The following mess of conditional code allows us to provide a destructor
+ * function in Mini-XML for our thread-global storage so that it can possibly
+ * be unloaded safely, although since there is no standard way to do so I
+ * can't even provide any guarantees that you can do it safely on all platforms.
+ *
+ * This code currently supports AIX, HP-UX, Linux, Mac OS X, Solaris, and
+ * Windows.  It might work on the BSDs and IRIX, but I haven't tested that.
+ */
+
+#if defined(__sun) || defined(_AIX)
+#  pragma fini(_mxml_fini)
+#  define _MXML_FINI _mxml_fini
+#elif defined(__hpux)
+#  pragma FINI _mxml_fini
+#  define _MXML_FINI _mxml_fini
+#elif defined(__GNUC__) /* Linux and Mac OS X */
+#  define _MXML_FINI __attribute((destructor)) _mxml_fini
+#else
+#  define _MXML_FINI _fini
+#endif /* __sun */
+
+
+/*
+ * 'mxml_error()' - Display an error message.
+ */
+
+void
+mxml_error(const char *format,		/* I - Printf-style format string */
+           ...)				/* I - Additional arguments as needed */
+{
+  va_list	ap;			/* Pointer to arguments */
+  char		s[1024];		/* Message string */
+  _mxml_global_t *global = _mxml_global();
+					/* Global data */
+
+
+ /*
+  * Range check input...
+  */
+
+  if (!format)
+    return;
+
+ /*
+  * Format the error message string...
+  */
+
+  va_start(ap, format);
+
+  vsnprintf(s, sizeof(s), format, ap);
+
+  va_end(ap);
+
+ /*
+  * And then display the error message...
+  */
+
+  if (global->error_cb)
+    (*global->error_cb)(s);
+  else
+    fprintf(stderr, "mxml: %s\n", s);
+}
+
+
+/*
+ * 'mxml_ignore_cb()' - Default callback for ignored values.
+ */
+
+mxml_type_t				/* O - Node type */
+mxml_ignore_cb(mxml_node_t *node)	/* I - Current node */
+{
+  (void)node;
+
+  return (MXML_IGNORE);
+}
+
+
+/*
+ * 'mxml_integer_cb()' - Default callback for integer values.
+ */
+
+mxml_type_t				/* O - Node type */
+mxml_integer_cb(mxml_node_t *node)	/* I - Current node */
+{
+  (void)node;
+
+  return (MXML_INTEGER);
+}
+
+
+/*
+ * 'mxml_opaque_cb()' - Default callback for opaque values.
+ */
+
+mxml_type_t				/* O - Node type */
+mxml_opaque_cb(mxml_node_t *node)	/* I - Current node */
+{
+  (void)node;
+
+  return (MXML_OPAQUE);
+}
+
+
+/*
+ * 'mxml_real_cb()' - Default callback for real number values.
+ */
+
+mxml_type_t				/* O - Node type */
+mxml_real_cb(mxml_node_t *node)		/* I - Current node */
+{
+  (void)node;
+
+  return (MXML_REAL);
+}
+
+
+#ifdef HAVE_PTHREAD_H			/**** POSIX threading ****/
+#  include <pthread.h>
+
+static pthread_key_t	_mxml_key = -1;	/* Thread local storage key */
+static pthread_once_t	_mxml_key_once = PTHREAD_ONCE_INIT;
+					/* One-time initialization object */
+static void		_mxml_init(void);
+static void		_mxml_destructor(void *g);
+
+
+/*
+ * '_mxml_destructor()' - Free memory used for globals...
+ */
+
+static void
+_mxml_destructor(void *g)		/* I - Global data */
+{
+  free(g);
+}
+
+
+/*
+ * '_mxml_fini()' - Clean up when unloaded.
+ */
+
+static void
+_MXML_FINI(void)
+{
+  _mxml_global_t	*global;	/* Global data */
+
+
+  if (_mxml_key != -1)
+  {
+    if ((global = (_mxml_global_t *)pthread_getspecific(_mxml_key)) != NULL)
+      _mxml_destructor(global);
+
+    pthread_key_delete(_mxml_key);
+    _mxml_key = -1;
+  }
+}
+
+
+/*
+ * '_mxml_global()' - Get global data.
+ */
+
+_mxml_global_t *			/* O - Global data */
+_mxml_global(void)
+{
+  _mxml_global_t	*global;	/* Global data */
+
+
+  pthread_once(&_mxml_key_once, _mxml_init);
+
+  if ((global = (_mxml_global_t *)pthread_getspecific(_mxml_key)) == NULL)
+  {
+    global = (_mxml_global_t *)calloc(1, sizeof(_mxml_global_t));
+    pthread_setspecific(_mxml_key, global);
+
+    global->num_entity_cbs = 1;
+    global->entity_cbs[0]  = _mxml_entity_cb;
+    global->wrap           = 72;
+  }
+
+  return (global);
+}
+
+
+/*
+ * '_mxml_init()' - Initialize global data...
+ */
+
+static void
+_mxml_init(void)
+{
+  pthread_key_create(&_mxml_key, _mxml_destructor);
+}
+
+
+#elif defined(WIN32) && defined(MXML1_EXPORTS) /**** WIN32 threading ****/
+#  include <windows.h>
+
+static DWORD _mxml_tls_index;		/* Index for global storage */
+
+
+/*
+ * 'DllMain()' - Main entry for library.
+ */
+
+BOOL WINAPI				/* O - Success/failure */
+DllMain(HINSTANCE hinst,		/* I - DLL module handle */
+        DWORD     reason,		/* I - Reason */
+        LPVOID    reserved)		/* I - Unused */
+{
+  _mxml_global_t	*global;	/* Global data */
+
+
+  (void)hinst;
+  (void)reserved;
+
+  switch (reason)
+  {
+    case DLL_PROCESS_ATTACH :		/* Called on library initialization */
+        if ((_mxml_tls_index = TlsAlloc()) == TLS_OUT_OF_INDEXES)
+          return (FALSE);
+        break;
+
+    case DLL_THREAD_DETACH :		/* Called when a thread terminates */
+        if ((global = (_mxml_global_t *)TlsGetValue(_mxml_tls_index)) != NULL)
+          free(global);
+        break;
+
+    case DLL_PROCESS_DETACH :		/* Called when library is unloaded */
+        if ((global = (_mxml_global_t *)TlsGetValue(_mxml_tls_index)) != NULL)
+          free(global);
+
+        TlsFree(_mxml_tls_index);
+        break;
+
+    default:
+        break;
+  }
+
+  return (TRUE);
+}
+
+
+/*
+ * '_mxml_global()' - Get global data.
+ */
+
+_mxml_global_t *			/* O - Global data */
+_mxml_global(void)
+{
+  _mxml_global_t	*global;	/* Global data */
+
+
+  if ((global = (_mxml_global_t *)TlsGetValue(_mxml_tls_index)) == NULL)
+  {
+    global = (_mxml_global_t *)calloc(1, sizeof(_mxml_global_t));
+
+    global->num_entity_cbs = 1;
+    global->entity_cbs[0]  = _mxml_entity_cb;
+    global->wrap           = 72;
+
+    TlsSetValue(_mxml_tls_index, (LPVOID)global);
+  }
+
+  return (global);
+}
+
+
+#else					/**** No threading ****/
+/*
+ * '_mxml_global()' - Get global data.
+ */
+
+_mxml_global_t *			/* O - Global data */
+_mxml_global(void)
+{
+  static _mxml_global_t	global =	/* Global data */
+  {
+    NULL,				/* error_cb */
+    1,					/* num_entity_cbs */
+    { _mxml_entity_cb },		/* entity_cbs */
+    72,					/* wrap */
+    NULL,				/* custom_load_cb */
+    NULL				/* custom_save_cb */
+  };
+
+
+  return (&global);
+}
+#endif /* HAVE_PTHREAD_H */
+
+
+/*
+ * End of "$Id: mxml-private.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-private.h b/tools/gator/daemon/mxml/mxml-private.h
new file mode 100644
index 000000000000..c5e4e6b6f27a
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-private.h
@@ -0,0 +1,50 @@
+/*
+ * "$Id: mxml-private.h 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Private definitions for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * Global, per-thread data...
+ */
+
+typedef struct _mxml_global_s
+{
+  void	(*error_cb)(const char *);
+  int	num_entity_cbs;
+  int	(*entity_cbs[100])(const char *name);
+  int	wrap;
+  mxml_custom_load_cb_t	custom_load_cb;
+  mxml_custom_save_cb_t	custom_save_cb;
+} _mxml_global_t;
+
+
+/*
+ * Functions...
+ */
+
+extern _mxml_global_t	*_mxml_global(void);
+extern int		_mxml_entity_cb(const char *name);
+
+
+/*
+ * End of "$Id: mxml-private.h 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-search.c b/tools/gator/daemon/mxml/mxml-search.c
new file mode 100644
index 000000000000..313a52f0ce2f
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-search.c
@@ -0,0 +1,280 @@
+/*
+ * "$Id: mxml-search.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Search/navigation functions for Mini-XML, a small XML-like file
+ * parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * 'mxmlFindElement()' - Find the named element.
+ *
+ * The search is constrained by the name, attribute name, and value; any
+ * NULL names or values are treated as wildcards, so different kinds of
+ * searches can be implemented by looking for all elements of a given name
+ * or all elements with a specific attribute. The descend argument determines
+ * whether the search descends into child nodes; normally you will use
+ * MXML_DESCEND_FIRST for the initial search and MXML_NO_DESCEND to find
+ * additional direct descendents of the node. The top node argument
+ * constrains the search to a particular node's children.
+ */
+
+mxml_node_t *				/* O - Element node or NULL */
+mxmlFindElement(mxml_node_t *node,	/* I - Current node */
+                mxml_node_t *top,	/* I - Top node */
+                const char  *name,	/* I - Element name or NULL for any */
+		const char  *attr,	/* I - Attribute name, or NULL for none */
+		const char  *value,	/* I - Attribute value, or NULL for any */
+		int         descend)	/* I - Descend into tree - MXML_DESCEND, MXML_NO_DESCEND, or MXML_DESCEND_FIRST */
+{
+  const char	*temp;			/* Current attribute value */
+
+
+ /*
+  * Range check input...
+  */
+
+  if (!node || !top || (!attr && value))
+    return (NULL);
+
+ /*
+  * Start with the next node...
+  */
+
+  node = mxmlWalkNext(node, top, descend);
+
+ /*
+  * Loop until we find a matching element...
+  */
+
+  while (node != NULL)
+  {
+   /*
+    * See if this node matches...
+    */
+
+    if (node->type == MXML_ELEMENT &&
+        node->value.element.name &&
+	(!name || !strcmp(node->value.element.name, name)))
+    {
+     /*
+      * See if we need to check for an attribute...
+      */
+
+      if (!attr)
+        return (node);			/* No attribute search, return it... */
+
+     /*
+      * Check for the attribute...
+      */
+
+      if ((temp = mxmlElementGetAttr(node, attr)) != NULL)
+      {
+       /*
+        * OK, we have the attribute, does it match?
+	*/
+
+	if (!value || !strcmp(value, temp))
+	  return (node);		/* Yes, return it... */
+      }
+    }
+
+   /*
+    * No match, move on to the next node...
+    */
+
+    if (descend == MXML_DESCEND)
+      node = mxmlWalkNext(node, top, MXML_DESCEND);
+    else
+      node = node->next;
+  }
+
+  return (NULL);
+}
+
+
+/*
+ * 'mxmlFindPath()' - Find a node with the given path.
+ *
+ * The "path" is a slash-separated list of element names. The name "*" is
+ * considered a wildcard for one or more levels of elements.  For example,
+ * "foo/one/two", "bar/two/one", "*\/one", and so forth.
+ *
+ * The first child node of the found node is returned if the given node has
+ * children and the first child is a value node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+mxml_node_t *				/* O - Found node or NULL */
+mxmlFindPath(mxml_node_t *top,		/* I - Top node */
+	     const char  *path)		/* I - Path to element */
+{
+  mxml_node_t	*node;			/* Current node */
+  char		element[256];		/* Current element name */
+  const char	*pathsep;		/* Separator in path */
+  int		descend;		/* mxmlFindElement option */
+
+
+ /*
+  * Range check input...
+  */
+
+  if (!top || !path || !*path)
+    return (NULL);
+
+ /*
+  * Search each element in the path...
+  */
+
+  node = top;
+  while (*path)
+  {
+   /*
+    * Handle wildcards...
+    */
+
+    if (!strncmp(path, "*/", 2))
+    {
+      path += 2;
+      descend = MXML_DESCEND;
+    }
+    else
+      descend = MXML_DESCEND_FIRST;
+
+   /*
+    * Get the next element in the path...
+    */
+
+    if ((pathsep = strchr(path, '/')) == NULL)
+      pathsep = path + strlen(path);
+
+    if (pathsep == path || (pathsep - path) >= sizeof(element))
+      return (NULL);
+
+    memcpy(element, path, pathsep - path);
+    element[pathsep - path] = '\0';
+
+    if (*pathsep)
+      path = pathsep + 1;
+    else
+      path = pathsep;
+
+   /*
+    * Search for the element...
+    */
+
+    if ((node = mxmlFindElement(node, node, element, NULL, NULL,
+                                descend)) == NULL)
+      return (NULL);
+  }
+
+ /*
+  * If we get this far, return the node or its first child...
+  */
+
+  if (node->child && node->child->type != MXML_ELEMENT)
+    return (node->child);
+  else
+    return (node);
+}
+
+
+/*
+ * 'mxmlWalkNext()' - Walk to the next logical node in the tree.
+ *
+ * The descend argument controls whether the first child is considered
+ * to be the next node. The top node argument constrains the walk to
+ * the node's children.
+ */
+
+mxml_node_t *				/* O - Next node or NULL */
+mxmlWalkNext(mxml_node_t *node,		/* I - Current node */
+             mxml_node_t *top,		/* I - Top node */
+             int         descend)	/* I - Descend into tree - MXML_DESCEND, MXML_NO_DESCEND, or MXML_DESCEND_FIRST */
+{
+  if (!node)
+    return (NULL);
+  else if (node->child && descend)
+    return (node->child);
+  else if (node == top)
+    return (NULL);
+  else if (node->next)
+    return (node->next);
+  else if (node->parent && node->parent != top)
+  {
+    node = node->parent;
+
+    while (!node->next)
+      if (node->parent == top || !node->parent)
+        return (NULL);
+      else
+        node = node->parent;
+
+    return (node->next);
+  }
+  else
+    return (NULL);
+}
+
+
+/*
+ * 'mxmlWalkPrev()' - Walk to the previous logical node in the tree.
+ *
+ * The descend argument controls whether the previous node's last child
+ * is considered to be the previous node. The top node argument constrains
+ * the walk to the node's children.
+ */
+
+mxml_node_t *				/* O - Previous node or NULL */
+mxmlWalkPrev(mxml_node_t *node,		/* I - Current node */
+             mxml_node_t *top,		/* I - Top node */
+             int         descend)	/* I - Descend into tree - MXML_DESCEND, MXML_NO_DESCEND, or MXML_DESCEND_FIRST */
+{
+  if (!node || node == top)
+    return (NULL);
+  else if (node->prev)
+  {
+    if (node->prev->last_child && descend)
+    {
+     /*
+      * Find the last child under the previous node...
+      */
+
+      node = node->prev->last_child;
+
+      while (node->last_child)
+        node = node->last_child;
+
+      return (node);
+    }
+    else
+      return (node->prev);
+  }
+  else if (node->parent != top)
+    return (node->parent);
+  else
+    return (NULL);
+}
+
+
+/*
+ * End of "$Id: mxml-search.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-set.c b/tools/gator/daemon/mxml/mxml-set.c
new file mode 100644
index 000000000000..16d4bf1050dd
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-set.c
@@ -0,0 +1,337 @@
+/*
+ * "$Id: mxml-set.c 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Node set functions for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+#include "mxml.h"
+
+
+/*
+ * 'mxmlSetCDATA()' - Set the element name of a CDATA node.
+ *
+ * The node is not changed if it (or its first child) is not a CDATA element node.
+ *
+ * @since Mini-XML 2.3@
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetCDATA(mxml_node_t *node,		/* I - Node to set */
+             const char  *data)		/* I - New data string */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      strncmp(node->value.element.name, "![CDATA[", 8) &&
+      node->child && node->child->type == MXML_ELEMENT &&
+      !strncmp(node->child->value.element.name, "![CDATA[", 8))
+    node = node->child;
+
+  if (!node || node->type != MXML_ELEMENT || !data ||
+      strncmp(node->value.element.name, "![CDATA[", 8))
+    return (-1);
+
+ /*
+  * Free any old element value and set the new value...
+  */
+
+  if (node->value.element.name)
+    free(node->value.element.name);
+
+  node->value.element.name = _mxml_strdupf("![CDATA[%s]]", data);
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetCustom()' - Set the data and destructor of a custom data node.
+ *
+ * The node is not changed if it (or its first child) is not a custom node.
+ *
+ * @since Mini-XML 2.1@
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetCustom(
+    mxml_node_t              *node,	/* I - Node to set */
+    void                     *data,	/* I - New data pointer */
+    mxml_custom_destroy_cb_t destroy)	/* I - New destructor function */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_CUSTOM)
+    node = node->child;
+
+  if (!node || node->type != MXML_CUSTOM)
+    return (-1);
+
+ /*
+  * Free any old element value and set the new value...
+  */
+
+  if (node->value.custom.data && node->value.custom.destroy)
+    (*(node->value.custom.destroy))(node->value.custom.data);
+
+  node->value.custom.data    = data;
+  node->value.custom.destroy = destroy;
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetElement()' - Set the name of an element node.
+ *
+ * The node is not changed if it is not an element node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetElement(mxml_node_t *node,	/* I - Node to set */
+               const char  *name)	/* I - New name string */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node || node->type != MXML_ELEMENT || !name)
+    return (-1);
+
+ /*
+  * Free any old element value and set the new value...
+  */
+
+  if (node->value.element.name)
+    free(node->value.element.name);
+
+  node->value.element.name = strdup(name);
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetInteger()' - Set the value of an integer node.
+ *
+ * The node is not changed if it (or its first child) is not an integer node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetInteger(mxml_node_t *node,	/* I - Node to set */
+               int         integer)	/* I - Integer value */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_INTEGER)
+    node = node->child;
+
+  if (!node || node->type != MXML_INTEGER)
+    return (-1);
+
+ /*
+  * Set the new value and return...
+  */
+
+  node->value.integer = integer;
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetOpaque()' - Set the value of an opaque node.
+ *
+ * The node is not changed if it (or its first child) is not an opaque node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetOpaque(mxml_node_t *node,	/* I - Node to set */
+              const char  *opaque)	/* I - Opaque string */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_OPAQUE)
+    node = node->child;
+
+  if (!node || node->type != MXML_OPAQUE || !opaque)
+    return (-1);
+
+ /*
+  * Free any old opaque value and set the new value...
+  */
+
+  if (node->value.opaque)
+    free(node->value.opaque);
+
+  node->value.opaque = strdup(opaque);
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetReal()' - Set the value of a real number node.
+ *
+ * The node is not changed if it (or its first child) is not a real number node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetReal(mxml_node_t *node,		/* I - Node to set */
+            double      real)		/* I - Real number value */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_REAL)
+    node = node->child;
+
+  if (!node || node->type != MXML_REAL)
+    return (-1);
+
+ /*
+  * Set the new value and return...
+  */
+
+  node->value.real = real;
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetText()' - Set the value of a text node.
+ *
+ * The node is not changed if it (or its first child) is not a text node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetText(mxml_node_t *node,		/* I - Node to set */
+            int         whitespace,	/* I - 1 = leading whitespace, 0 = no whitespace */
+	    const char  *string)	/* I - String */
+{
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_TEXT)
+    node = node->child;
+
+  if (!node || node->type != MXML_TEXT || !string)
+    return (-1);
+
+ /*
+  * Free any old string value and set the new value...
+  */
+
+  if (node->value.text.string)
+    free(node->value.text.string);
+
+  node->value.text.whitespace = whitespace;
+  node->value.text.string     = strdup(string);
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetTextf()' - Set the value of a text node to a formatted string.
+ *
+ * The node is not changed if it (or its first child) is not a text node.
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetTextf(mxml_node_t *node,		/* I - Node to set */
+             int         whitespace,	/* I - 1 = leading whitespace, 0 = no whitespace */
+             const char  *format,	/* I - Printf-style format string */
+	     ...)			/* I - Additional arguments as needed */
+{
+  va_list	ap;			/* Pointer to arguments */
+
+
+ /*
+  * Range check input...
+  */
+
+  if (node && node->type == MXML_ELEMENT &&
+      node->child && node->child->type == MXML_TEXT)
+    node = node->child;
+
+  if (!node || node->type != MXML_TEXT || !format)
+    return (-1);
+
+ /*
+  * Free any old string value and set the new value...
+  */
+
+  if (node->value.text.string)
+    free(node->value.text.string);
+
+  va_start(ap, format);
+
+  node->value.text.whitespace = whitespace;
+  node->value.text.string     = _mxml_strdupf(format, ap);
+
+  va_end(ap);
+
+  return (0);
+}
+
+
+/*
+ * 'mxmlSetUserData()' - Set the user data pointer for a node.
+ *
+ * @since Mini-XML 2.7@
+ */
+
+int					/* O - 0 on success, -1 on failure */
+mxmlSetUserData(mxml_node_t *node,	/* I - Node to set */
+                void        *data)	/* I - User data pointer */
+{
+ /*
+  * Range check input...
+  */
+
+  if (!node)
+    return (-1);
+
+ /*
+  * Set the user data pointer and return...
+  */
+
+  node->user_data = data;
+  return (0);
+}
+
+
+/*
+ * End of "$Id: mxml-set.c 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml-string.c b/tools/gator/daemon/mxml/mxml-string.c
new file mode 100644
index 000000000000..9d5b58e6adb7
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml-string.c
@@ -0,0 +1,469 @@
+/*
+ * "$Id: mxml-string.c 454 2014-01-05 03:25:07Z msweet $"
+ *
+ * String functions for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Include necessary headers...
+ */
+
+#include "config.h"
+
+
+/*
+ * The va_copy macro is part of C99, but many compilers don't implement it.
+ * Provide a "direct assignment" implmentation when va_copy isn't defined...
+ */
+
+#ifndef va_copy
+#  ifdef __va_copy
+#    define va_copy(dst,src) __va_copy(dst,src)
+#  else
+#    define va_copy(dst,src) memcpy(&dst, src, sizeof(va_list))
+#  endif /* __va_copy */
+#endif /* va_copy */
+
+
+#ifndef HAVE_SNPRINTF
+/*
+ * '_mxml_snprintf()' - Format a string.
+ */
+
+int					/* O - Number of bytes formatted */
+_mxml_snprintf(char       *buffer,	/* I - Output buffer */
+               size_t     bufsize,	/* I - Size of output buffer */
+	       const char *format,	/* I - Printf-style format string */
+	       ...)			/* I - Additional arguments as needed */
+{
+  va_list	ap;			/* Argument list */
+  int		bytes;			/* Number of bytes formatted */
+
+
+  va_start(ap, format);
+  bytes = vsnprintf(buffer, bufsize, format, ap);
+  va_end(ap);
+
+  return (bytes);
+}
+#endif /* !HAVE_SNPRINTF */
+
+
+/*
+ * '_mxml_strdup()' - Duplicate a string.
+ */
+
+#ifndef HAVE_STRDUP
+char *					/* O - New string pointer */
+_mxml_strdup(const char *s)		/* I - String to duplicate */
+{
+  char	*t;				/* New string pointer */
+
+
+  if (s == NULL)
+    return (NULL);
+
+  if ((t = malloc(strlen(s) + 1)) == NULL)
+    return (NULL);
+
+  return (strcpy(t, s));
+}
+#endif /* !HAVE_STRDUP */
+
+
+/*
+ * '_mxml_strdupf()' - Format and duplicate a string.
+ */
+
+char *					/* O - New string pointer */
+_mxml_strdupf(const char *format,	/* I - Printf-style format string */
+              ...)			/* I - Additional arguments as needed */
+{
+  va_list	ap;			/* Pointer to additional arguments */
+  char		*s;			/* Pointer to formatted string */
+
+
+ /*
+  * Get a pointer to the additional arguments, format the string,
+  * and return it...
+  */
+
+  va_start(ap, format);
+  s = _mxml_vstrdupf(format, ap);
+  va_end(ap);
+
+  return (s);
+}
+
+
+#ifndef HAVE_VSNPRINTF
+/*
+ * '_mxml_vsnprintf()' - Format a string into a fixed size buffer.
+ */
+
+int					/* O - Number of bytes formatted */
+_mxml_vsnprintf(char       *buffer,	/* O - Output buffer */
+                size_t     bufsize,	/* O - Size of output buffer */
+		const char *format,	/* I - Printf-style format string */
+ 		va_list    ap)		/* I - Pointer to additional arguments */
+{
+  char		*bufptr,		/* Pointer to position in buffer */
+		*bufend,		/* Pointer to end of buffer */
+		sign,			/* Sign of format width */
+		size,			/* Size character (h, l, L) */
+		type;			/* Format type character */
+  int		width,			/* Width of field */
+		prec;			/* Number of characters of precision */
+  char		tformat[100],		/* Temporary format string for sprintf() */
+		*tptr,			/* Pointer into temporary format */
+		temp[1024];		/* Buffer for formatted numbers */
+  char		*s;			/* Pointer to string */
+  int		slen;			/* Length of string */
+  int		bytes;			/* Total number of bytes needed */
+
+
+ /*
+  * Loop through the format string, formatting as needed...
+  */
+
+  bufptr = buffer;
+  bufend = buffer + bufsize - 1;
+  bytes  = 0;
+
+  while (*format)
+  {
+    if (*format == '%')
+    {
+      tptr = tformat;
+      *tptr++ = *format++;
+
+      if (*format == '%')
+      {
+        if (bufptr && bufptr < bufend)
+          *bufptr++ = *format;
+        bytes ++;
+        format ++;
+	continue;
+      }
+      else if (strchr(" -+#\'", *format))
+      {
+        *tptr++ = *format;
+        sign = *format++;
+      }
+      else
+        sign = 0;
+
+      if (*format == '*')
+      {
+       /*
+        * Get width from argument...
+	*/
+
+	format ++;
+	width = va_arg(ap, int);
+
+	snprintf(tptr, sizeof(tformat) - (tptr - tformat), "%d", width);
+	tptr += strlen(tptr);
+      }
+      else
+      {
+	width = 0;
+
+	while (isdigit(*format & 255))
+	{
+	  if (tptr < (tformat + sizeof(tformat) - 1))
+	    *tptr++ = *format;
+
+	  width = width * 10 + *format++ - '0';
+	}
+      }
+
+      if (*format == '.')
+      {
+	if (tptr < (tformat + sizeof(tformat) - 1))
+	  *tptr++ = *format;
+
+        format ++;
+
+        if (*format == '*')
+	{
+         /*
+	  * Get precision from argument...
+	  */
+
+	  format ++;
+	  prec = va_arg(ap, int);
+
+	  snprintf(tptr, sizeof(tformat) - (tptr - tformat), "%d", prec);
+	  tptr += strlen(tptr);
+	}
+	else
+	{
+	  prec = 0;
+
+	  while (isdigit(*format & 255))
+	  {
+	    if (tptr < (tformat + sizeof(tformat) - 1))
+	      *tptr++ = *format;
+
+	    prec = prec * 10 + *format++ - '0';
+	  }
+	}
+      }
+      else
+        prec = -1;
+
+      if (*format == 'l' && format[1] == 'l')
+      {
+        size = 'L';
+
+	if (tptr < (tformat + sizeof(tformat) - 2))
+	{
+	  *tptr++ = 'l';
+	  *tptr++ = 'l';
+	}
+
+	format += 2;
+      }
+      else if (*format == 'h' || *format == 'l' || *format == 'L')
+      {
+	if (tptr < (tformat + sizeof(tformat) - 1))
+	  *tptr++ = *format;
+
+        size = *format++;
+      }
+
+      if (!*format)
+        break;
+
+      if (tptr < (tformat + sizeof(tformat) - 1))
+        *tptr++ = *format;
+
+      type  = *format++;
+      *tptr = '\0';
+
+      switch (type)
+      {
+	case 'E' : /* Floating point formats */
+	case 'G' :
+	case 'e' :
+	case 'f' :
+	case 'g' :
+	    if ((width + 2) > sizeof(temp))
+	      break;
+
+	    sprintf(temp, tformat, va_arg(ap, double));
+
+            bytes += strlen(temp);
+
+            if (bufptr)
+	    {
+	      if ((bufptr + strlen(temp)) > bufend)
+	      {
+		strncpy(bufptr, temp, (size_t)(bufend - bufptr));
+		bufptr = bufend;
+	      }
+	      else
+	      {
+		strcpy(bufptr, temp);
+		bufptr += strlen(temp);
+	      }
+	    }
+	    break;
+
+        case 'B' : /* Integer formats */
+	case 'X' :
+	case 'b' :
+        case 'd' :
+	case 'i' :
+	case 'o' :
+	case 'u' :
+	case 'x' :
+	    if ((width + 2) > sizeof(temp))
+	      break;
+
+#ifdef HAVE_LONG_LONG
+	    if (size == 'L')
+	      sprintf(temp, tformat, va_arg(ap, long long));
+	    else
+#endif /* HAVE_LONG_LONG */
+	    sprintf(temp, tformat, va_arg(ap, int));
+
+            bytes += strlen(temp);
+
+	    if (bufptr)
+	    {
+	      if ((bufptr + strlen(temp)) > bufend)
+	      {
+		strncpy(bufptr, temp, (size_t)(bufend - bufptr));
+		bufptr = bufend;
+	      }
+	      else
+	      {
+		strcpy(bufptr, temp);
+		bufptr += strlen(temp);
+	      }
+	    }
+	    break;
+
+	case 'p' : /* Pointer value */
+	    if ((width + 2) > sizeof(temp))
+	      break;
+
+	    sprintf(temp, tformat, va_arg(ap, void *));
+
+            bytes += strlen(temp);
+
+	    if (bufptr)
+	    {
+	      if ((bufptr + strlen(temp)) > bufend)
+	      {
+		strncpy(bufptr, temp, (size_t)(bufend - bufptr));
+		bufptr = bufend;
+	      }
+	      else
+	      {
+		strcpy(bufptr, temp);
+		bufptr += strlen(temp);
+	      }
+	    }
+	    break;
+
+        case 'c' : /* Character or character array */
+	    bytes += width;
+
+	    if (bufptr)
+	    {
+	      if (width <= 1)
+	        *bufptr++ = va_arg(ap, int);
+	      else
+	      {
+		if ((bufptr + width) > bufend)
+		  width = bufend - bufptr;
+
+		memcpy(bufptr, va_arg(ap, char *), (size_t)width);
+		bufptr += width;
+	      }
+	    }
+	    break;
+
+	case 's' : /* String */
+	    if ((s = va_arg(ap, char *)) == NULL)
+	      s = "(null)";
+
+	    slen = strlen(s);
+	    if (slen > width && prec != width)
+	      width = slen;
+
+            bytes += width;
+
+	    if (bufptr)
+	    {
+	      if ((bufptr + width) > bufend)
+	        width = bufend - bufptr;
+
+              if (slen > width)
+	        slen = width;
+
+	      if (sign == '-')
+	      {
+		strncpy(bufptr, s, (size_t)slen);
+		memset(bufptr + slen, ' ', (size_t)(width - slen));
+	      }
+	      else
+	      {
+		memset(bufptr, ' ', (size_t)(width - slen));
+		strncpy(bufptr + width - slen, s, (size_t)slen);
+	      }
+
+	      bufptr += width;
+	    }
+	    break;
+
+	case 'n' : /* Output number of chars so far */
+	    *(va_arg(ap, int *)) = bytes;
+	    break;
+      }
+    }
+    else
+    {
+      bytes ++;
+
+      if (bufptr && bufptr < bufend)
+        *bufptr++ = *format;
+
+      format ++;
+    }
+  }
+
+ /*
+  * Nul-terminate the string and return the number of characters needed.
+  */
+
+  *bufptr = '\0';
+
+  return (bytes);
+}
+#endif /* !HAVE_VSNPRINTF */
+
+
+/*
+ * '_mxml_vstrdupf()' - Format and duplicate a string.
+ */
+
+char *					/* O - New string pointer */
+_mxml_vstrdupf(const char *format,	/* I - Printf-style format string */
+               va_list    ap)		/* I - Pointer to additional arguments */
+{
+  int		bytes;			/* Number of bytes required */
+  char		*buffer,		/* String buffer */
+		temp[256];		/* Small buffer for first vsnprintf */
+  va_list	apcopy;			/* Copy of argument list */
+
+
+ /*
+  * First format with a tiny buffer; this will tell us how many bytes are
+  * needed...
+  */
+
+  va_copy(apcopy, ap);
+  bytes = vsnprintf(temp, sizeof(temp), format, apcopy);
+
+  if (bytes < sizeof(temp))
+  {
+   /*
+    * Hey, the formatted string fits in the tiny buffer, so just dup that...
+    */
+
+    return (strdup(temp));
+  }
+
+ /*
+  * Allocate memory for the whole thing and reformat to the new, larger
+  * buffer...
+  */
+
+  if ((buffer = calloc(1, bytes + 1)) != NULL)
+    vsnprintf(buffer, bytes + 1, format, ap);
+
+ /*
+  * Return the new string...
+  */
+
+  return (buffer);
+}
+
+
+/*
+ * End of "$Id: mxml-string.c 454 2014-01-05 03:25:07Z msweet $".
+ */
diff --git a/tools/gator/daemon/mxml/mxml.h b/tools/gator/daemon/mxml/mxml.h
new file mode 100644
index 000000000000..bba5fd23a67b
--- /dev/null
+++ b/tools/gator/daemon/mxml/mxml.h
@@ -0,0 +1,332 @@
+/*
+ * "$Id: mxml.h 451 2014-01-04 21:50:06Z msweet $"
+ *
+ * Header file for Mini-XML, a small XML-like file parsing library.
+ *
+ * Copyright 2003-2014 by Michael R Sweet.
+ *
+ * These coded instructions, statements, and computer programs are the
+ * property of Michael R Sweet and are protected by Federal copyright
+ * law.  Distribution and use rights are outlined in the file "COPYING"
+ * which should have been included with this file.  If this file is
+ * missing or damaged, see the license at:
+ *
+ *     http://www.msweet.org/projects.php/Mini-XML
+ */
+
+/*
+ * Prevent multiple inclusion...
+ */
+
+#ifndef _mxml_h_
+#  define _mxml_h_
+
+/*
+ * Include necessary headers...
+ */
+
+#  include <stdio.h>
+#  include <stdlib.h>
+#  include <string.h>
+#  include <ctype.h>
+#  include <errno.h>
+
+
+/*
+ * Constants...
+ */
+
+#  define MXML_MAJOR_VERSION	2	/* Major version number */
+#  define MXML_MINOR_VERSION	8	/* Minor version number */
+
+#  define MXML_TAB		8	/* Tabs every N columns */
+
+#  define MXML_NO_CALLBACK	0	/* Don't use a type callback */
+#  define MXML_INTEGER_CALLBACK	mxml_integer_cb
+					/* Treat all data as integers */
+#  define MXML_OPAQUE_CALLBACK	mxml_opaque_cb
+					/* Treat all data as opaque */
+#  define MXML_REAL_CALLBACK	mxml_real_cb
+					/* Treat all data as real numbers */
+#  define MXML_TEXT_CALLBACK	0	/* Treat all data as text */
+#  define MXML_IGNORE_CALLBACK	mxml_ignore_cb
+					/* Ignore all non-element content */
+
+#  define MXML_NO_PARENT	0	/* No parent for the node */
+
+#  define MXML_DESCEND		1	/* Descend when finding/walking */
+#  define MXML_NO_DESCEND	0	/* Don't descend when finding/walking */
+#  define MXML_DESCEND_FIRST	-1	/* Descend for first find */
+
+#  define MXML_WS_BEFORE_OPEN	0	/* Callback for before open tag */
+#  define MXML_WS_AFTER_OPEN	1	/* Callback for after open tag */
+#  define MXML_WS_BEFORE_CLOSE	2	/* Callback for before close tag */
+#  define MXML_WS_AFTER_CLOSE	3	/* Callback for after close tag */
+
+#  define MXML_ADD_BEFORE	0	/* Add node before specified node */
+#  define MXML_ADD_AFTER	1	/* Add node after specified node */
+#  define MXML_ADD_TO_PARENT	NULL	/* Add node relative to parent */
+
+
+/*
+ * Data types...
+ */
+
+typedef enum mxml_sax_event_e		/**** SAX event type. ****/
+{
+  MXML_SAX_CDATA,			/* CDATA node */
+  MXML_SAX_COMMENT,			/* Comment node */
+  MXML_SAX_DATA,			/* Data node */
+  MXML_SAX_DIRECTIVE,			/* Processing directive node */
+  MXML_SAX_ELEMENT_CLOSE,		/* Element closed */
+  MXML_SAX_ELEMENT_OPEN			/* Element opened */
+} mxml_sax_event_t;
+
+typedef enum mxml_type_e		/**** The XML node type. ****/
+{
+  MXML_IGNORE = -1,			/* Ignore/throw away node @since Mini-XML 2.3@ */
+  MXML_ELEMENT,				/* XML element with attributes */
+  MXML_INTEGER,				/* Integer value */
+  MXML_OPAQUE,				/* Opaque string */
+  MXML_REAL,				/* Real value */
+  MXML_TEXT,				/* Text fragment */
+  MXML_CUSTOM				/* Custom data @since Mini-XML 2.1@ */
+} mxml_type_t;
+
+typedef void (*mxml_custom_destroy_cb_t)(void *);
+					/**** Custom data destructor ****/
+
+typedef void (*mxml_error_cb_t)(const char *);
+					/**** Error callback function ****/
+
+typedef struct mxml_attr_s		/**** An XML element attribute value. @private@ ****/
+{
+  char			*name;		/* Attribute name */
+  char			*value;		/* Attribute value */
+} mxml_attr_t;
+
+typedef struct mxml_element_s		/**** An XML element value. @private@ ****/
+{
+  char			*name;		/* Name of element */
+  int			num_attrs;	/* Number of attributes */
+  mxml_attr_t		*attrs;		/* Attributes */
+} mxml_element_t;
+
+typedef struct mxml_text_s		/**** An XML text value. @private@ ****/
+{
+  int			whitespace;	/* Leading whitespace? */
+  char			*string;	/* Fragment string */
+} mxml_text_t;
+
+typedef struct mxml_custom_s		/**** An XML custom value. @private@ ****/
+{
+  void			*data;		/* Pointer to (allocated) custom data */
+  mxml_custom_destroy_cb_t destroy;	/* Pointer to destructor function */
+} mxml_custom_t;
+
+typedef union mxml_value_u		/**** An XML node value. @private@ ****/
+{
+  mxml_element_t	element;	/* Element */
+  int			integer;	/* Integer number */
+  char			*opaque;	/* Opaque string */
+  double		real;		/* Real number */
+  mxml_text_t		text;		/* Text fragment */
+  mxml_custom_t		custom;		/* Custom data @since Mini-XML 2.1@ */
+} mxml_value_t;
+
+struct mxml_node_s			/**** An XML node. @private@ ****/
+{
+  mxml_type_t		type;		/* Node type */
+  struct mxml_node_s	*next;		/* Next node under same parent */
+  struct mxml_node_s	*prev;		/* Previous node under same parent */
+  struct mxml_node_s	*parent;	/* Parent node */
+  struct mxml_node_s	*child;		/* First child node */
+  struct mxml_node_s	*last_child;	/* Last child node */
+  mxml_value_t		value;		/* Node value */
+  int			ref_count;	/* Use count */
+  void			*user_data;	/* User data */
+};
+
+typedef struct mxml_node_s mxml_node_t;	/**** An XML node. ****/
+
+struct mxml_index_s			 /**** An XML node index. @private@ ****/
+{
+  char			*attr;		/* Attribute used for indexing or NULL */
+  int			num_nodes;	/* Number of nodes in index */
+  int			alloc_nodes;	/* Allocated nodes in index */
+  int			cur_node;	/* Current node */
+  mxml_node_t		**nodes;	/* Node array */
+};
+
+typedef struct mxml_index_s mxml_index_t;
+					/**** An XML node index. ****/
+
+typedef int (*mxml_custom_load_cb_t)(mxml_node_t *, const char *);
+					/**** Custom data load callback function ****/
+
+typedef char *(*mxml_custom_save_cb_t)(mxml_node_t *);
+					/**** Custom data save callback function ****/
+
+typedef int (*mxml_entity_cb_t)(const char *);
+					/**** Entity callback function */
+
+typedef mxml_type_t (*mxml_load_cb_t)(mxml_node_t *);
+					/**** Load callback function ****/
+
+typedef const char *(*mxml_save_cb_t)(mxml_node_t *, int);
+					/**** Save callback function ****/
+
+typedef void (*mxml_sax_cb_t)(mxml_node_t *, mxml_sax_event_t, void *);
+					/**** SAX callback function ****/
+
+
+/*
+ * C++ support...
+ */
+
+#  ifdef __cplusplus
+extern "C" {
+#  endif /* __cplusplus */
+
+/*
+ * Prototypes...
+ */
+
+extern void		mxmlAdd(mxml_node_t *parent, int where,
+			        mxml_node_t *child, mxml_node_t *node);
+extern void		mxmlDelete(mxml_node_t *node);
+extern void		mxmlElementDeleteAttr(mxml_node_t *node,
+			                      const char *name);
+extern const char	*mxmlElementGetAttr(mxml_node_t *node, const char *name);
+extern void		mxmlElementSetAttr(mxml_node_t *node, const char *name,
+			                   const char *value);
+extern void		mxmlElementSetAttrf(mxml_node_t *node, const char *name,
+			                    const char *format, ...)
+#    ifdef __GNUC__
+__attribute__ ((__format__ (__printf__, 3, 4)))
+#    endif /* __GNUC__ */
+;
+extern int		mxmlEntityAddCallback(mxml_entity_cb_t cb);
+extern const char	*mxmlEntityGetName(int val);
+extern int		mxmlEntityGetValue(const char *name);
+extern void		mxmlEntityRemoveCallback(mxml_entity_cb_t cb);
+extern mxml_node_t	*mxmlFindElement(mxml_node_t *node, mxml_node_t *top,
+			                 const char *name, const char *attr,
+					 const char *value, int descend);
+extern mxml_node_t	*mxmlFindPath(mxml_node_t *node, const char *path);
+extern const char	*mxmlGetCDATA(mxml_node_t *node);
+extern const void	*mxmlGetCustom(mxml_node_t *node);
+extern const char	*mxmlGetElement(mxml_node_t *node);
+extern mxml_node_t	*mxmlGetFirstChild(mxml_node_t *node);
+extern int		mxmlGetInteger(mxml_node_t *node);
+extern mxml_node_t	*mxmlGetLastChild(mxml_node_t *node);
+extern mxml_node_t	*mxmlGetNextSibling(mxml_node_t *node);
+extern const char	*mxmlGetOpaque(mxml_node_t *node);
+extern mxml_node_t	*mxmlGetParent(mxml_node_t *node);
+extern mxml_node_t	*mxmlGetPrevSibling(mxml_node_t *node);
+extern double		mxmlGetReal(mxml_node_t *node);
+extern int		mxmlGetRefCount(mxml_node_t *node);
+extern const char	*mxmlGetText(mxml_node_t *node, int *whitespace);
+extern mxml_type_t	mxmlGetType(mxml_node_t *node);
+extern void		*mxmlGetUserData(mxml_node_t *node);
+extern void		mxmlIndexDelete(mxml_index_t *ind);
+extern mxml_node_t	*mxmlIndexEnum(mxml_index_t *ind);
+extern mxml_node_t	*mxmlIndexFind(mxml_index_t *ind,
+			               const char *element,
+			               const char *value);
+extern int		mxmlIndexGetCount(mxml_index_t *ind);
+extern mxml_index_t	*mxmlIndexNew(mxml_node_t *node, const char *element,
+			              const char *attr);
+extern mxml_node_t	*mxmlIndexReset(mxml_index_t *ind);
+extern mxml_node_t	*mxmlLoadFd(mxml_node_t *top, int fd,
+			            mxml_type_t (*cb)(mxml_node_t *));
+extern mxml_node_t	*mxmlLoadFile(mxml_node_t *top, FILE *fp,
+			              mxml_type_t (*cb)(mxml_node_t *));
+extern mxml_node_t	*mxmlLoadString(mxml_node_t *top, const char *s,
+			                mxml_type_t (*cb)(mxml_node_t *));
+extern mxml_node_t	*mxmlNewCDATA(mxml_node_t *parent, const char *string);
+extern mxml_node_t	*mxmlNewCustom(mxml_node_t *parent, void *data,
+			               mxml_custom_destroy_cb_t destroy);
+extern mxml_node_t	*mxmlNewElement(mxml_node_t *parent, const char *name);
+extern mxml_node_t	*mxmlNewInteger(mxml_node_t *parent, int integer);
+extern mxml_node_t	*mxmlNewOpaque(mxml_node_t *parent, const char *opaque);
+extern mxml_node_t	*mxmlNewReal(mxml_node_t *parent, double real);
+extern mxml_node_t	*mxmlNewText(mxml_node_t *parent, int whitespace,
+			             const char *string);
+extern mxml_node_t	*mxmlNewTextf(mxml_node_t *parent, int whitespace,
+			              const char *format, ...)
+#    ifdef __GNUC__
+__attribute__ ((__format__ (__printf__, 3, 4)))
+#    endif /* __GNUC__ */
+;
+extern mxml_node_t	*mxmlNewXML(const char *version);
+extern int		mxmlRelease(mxml_node_t *node);
+extern void		mxmlRemove(mxml_node_t *node);
+extern int		mxmlRetain(mxml_node_t *node);
+extern char		*mxmlSaveAllocString(mxml_node_t *node,
+			        	     mxml_save_cb_t cb);
+extern int		mxmlSaveFd(mxml_node_t *node, int fd,
+			           mxml_save_cb_t cb);
+extern int		mxmlSaveFile(mxml_node_t *node, FILE *fp,
+			             mxml_save_cb_t cb);
+extern int		mxmlSaveString(mxml_node_t *node, char *buffer,
+			               int bufsize, mxml_save_cb_t cb);
+extern mxml_node_t	*mxmlSAXLoadFd(mxml_node_t *top, int fd,
+			               mxml_type_t (*cb)(mxml_node_t *),
+			               mxml_sax_cb_t sax, void *sax_data);
+extern mxml_node_t	*mxmlSAXLoadFile(mxml_node_t *top, FILE *fp,
+			                 mxml_type_t (*cb)(mxml_node_t *),
+			                 mxml_sax_cb_t sax, void *sax_data);
+extern mxml_node_t	*mxmlSAXLoadString(mxml_node_t *top, const char *s,
+			                   mxml_type_t (*cb)(mxml_node_t *),
+			                   mxml_sax_cb_t sax, void *sax_data);
+extern int		mxmlSetCDATA(mxml_node_t *node, const char *data);
+extern int		mxmlSetCustom(mxml_node_t *node, void *data,
+			              mxml_custom_destroy_cb_t destroy);
+extern void		mxmlSetCustomHandlers(mxml_custom_load_cb_t load,
+			                      mxml_custom_save_cb_t save);
+extern int		mxmlSetElement(mxml_node_t *node, const char *name);
+extern void		mxmlSetErrorCallback(mxml_error_cb_t cb);
+extern int		mxmlSetInteger(mxml_node_t *node, int integer);
+extern int		mxmlSetOpaque(mxml_node_t *node, const char *opaque);
+extern int		mxmlSetReal(mxml_node_t *node, double real);
+extern int		mxmlSetText(mxml_node_t *node, int whitespace,
+			            const char *string);
+extern int		mxmlSetTextf(mxml_node_t *node, int whitespace,
+			             const char *format, ...)
+#    ifdef __GNUC__
+__attribute__ ((__format__ (__printf__, 3, 4)))
+#    endif /* __GNUC__ */
+;
+extern int		mxmlSetUserData(mxml_node_t *node, void *data);
+extern void		mxmlSetWrapMargin(int column);
+extern mxml_node_t	*mxmlWalkNext(mxml_node_t *node, mxml_node_t *top,
+			              int descend);
+extern mxml_node_t	*mxmlWalkPrev(mxml_node_t *node, mxml_node_t *top,
+			              int descend);
+
+
+/*
+ * Semi-private functions...
+ */
+
+extern void		mxml_error(const char *format, ...);
+extern mxml_type_t	mxml_ignore_cb(mxml_node_t *node);
+extern mxml_type_t	mxml_integer_cb(mxml_node_t *node);
+extern mxml_type_t	mxml_opaque_cb(mxml_node_t *node);
+extern mxml_type_t	mxml_real_cb(mxml_node_t *node);
+
+
+/*
+ * C++ support...
+ */
+
+#  ifdef __cplusplus
+}
+#  endif /* __cplusplus */
+#endif /* !_mxml_h_ */
+
+
+/*
+ * End of "$Id: mxml.h 451 2014-01-04 21:50:06Z msweet $".
+ */
diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile
index 926cbf3efc7f..2c5a19733357 100644
--- a/tools/lib/lk/Makefile
+++ b/tools/lib/lk/Makefile
@@ -1,5 +1,8 @@
 include ../../scripts/Makefile.include
 
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+
 # guard against environment variables
 LIB_H=
 LIB_OBJS=
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQFD
+       bool
+
 config HAVE_KVM_IRQ_ROUTING
        bool
 
@@ -22,8 +25,15 @@ config KVM_MMIO
 config KVM_ASYNC_PF
        bool
 
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+       bool
+
 config HAVE_KVM_MSI
        bool
 
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
+
+config KVM_VFIO
+       bool
diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c55b6089e923..5081e809821f 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,14 +25,12 @@
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
 
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
 
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
-	.level	= 1,
-};
+static unsigned int host_vtimer_irq;
 
 static cycle_t kvm_phys_timer_read(void)
 {
@@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    vcpu->arch.timer_cpu.irq->irq,
-			    vcpu->arch.timer_cpu.irq->level);
+			    timer->irq->irq,
+			    timer->irq->level);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 	timer_arm(timer, ns);
 }
 
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	/*
+	 * The vcpu timer irq number cannot be determined in
+	 * kvm_timer_vcpu_init() because it is called much before
+	 * kvm_vcpu_set_target(). To handle this, we determine
+	 * vcpu timer irq number when the vcpu is reset.
+	 */
+	timer->irq = irq;
+}
+
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -163,14 +175,47 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	timer->timer.function = kvm_timer_expire;
-	timer->irq = &timer_irq;
 }
 
 static void kvm_timer_init_interrupt(void *info)
 {
-	enable_percpu_irq(timer_irq.irq, 0);
+	enable_percpu_irq(host_vtimer_irq, 0);
 }
 
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		timer->cntv_ctl = value;
+		break;
+	case KVM_REG_ARM_TIMER_CNT:
+		vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+		break;
+	case KVM_REG_ARM_TIMER_CVAL:
+		timer->cntv_cval = value;
+		break;
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		return timer->cntv_ctl;
+	case KVM_REG_ARM_TIMER_CNT:
+		return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+	case KVM_REG_ARM_TIMER_CVAL:
+		return timer->cntv_cval;
+	}
+	return (u64)-1;
+}
 
 static int kvm_timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *cpu)
@@ -182,7 +227,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(timer_irq.irq);
+		disable_percpu_irq(host_vtimer_irq);
 		break;
 	}
 
@@ -195,6 +240,7 @@ static struct notifier_block kvm_timer_cpu_nb = {
 
 static const struct of_device_id arch_timer_of_match[] = {
 	{ .compatible	= "arm,armv7-timer",	},
+	{ .compatible	= "arm,armv8-timer",	},
 	{},
 };
 
@@ -229,7 +275,7 @@ int kvm_timer_hyp_init(void)
 		goto out;
 	}
 
-	timer_irq.irq = ppi;
+	host_vtimer_irq = ppi;
 
 	err = register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..01124ef3690a
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & GICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & GICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & GICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= GICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= GICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= GICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+	return val;
+}
+
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+	return val;
+}
+
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & GICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & GICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v2_ops = {
+	.get_lr			= vgic_v2_get_lr,
+	.set_lr			= vgic_v2_set_lr,
+	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
+	.get_elrsr		= vgic_v2_get_elrsr,
+	.get_eisr		= vgic_v2_get_eisr,
+	.get_interrupt_status	= vgic_v2_get_interrupt_status,
+	.enable_underflow	= vgic_v2_enable_underflow,
+	.disable_underflow	= vgic_v2_disable_underflow,
+	.get_vmcr		= vgic_v2_get_vmcr,
+	.set_vmcr		= vgic_v2_set_vmcr,
+	.enable			= vgic_v2_enable,
+};
+
+static struct vgic_params vgic_v2_params;
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv2 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret;
+	struct resource vctrl_res;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v2_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+	if (ret) {
+		kvm_err("Cannot obtain GICH resource\n");
+		goto out;
+	}
+
+	vgic->vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic->vctrl_base) {
+		kvm_err("Cannot ioremap GICH\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+	vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+
+	ret = create_hyp_io_mappings(vgic->vctrl_base,
+				     vgic->vctrl_base + resource_size(&vctrl_res),
+				     vctrl_res.start);
+	if (ret) {
+		kvm_err("Cannot map VCTRL into hyp\n");
+		goto out_unmap;
+	}
+
+	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV resource\n");
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
+	vgic->vcpu_base = vcpu_res.start;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vctrl_res.start, vgic->maint_irq);
+
+	vgic->type = VGIC_V2;
+	*ops = &vgic_v2_ops;
+	*params = vgic;
+	goto out;
+
+out_unmap:
+	iounmap(vgic->vctrl_base);
+out:
+	of_node_put(vgic_node);
+	return ret;
+}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..1c2c8eef0599
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID		(0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
+#define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define LR_INDEX(lr)			(VGIC_V3_MAX_LRS - 1 - lr)
+
+static u32 ich_vtr_el2;
+
+static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & ICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & ICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & ICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
+		      lr_desc.irq);
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= ICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= ICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= ICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+}
+
+static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
+}
+
+static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+}
+
+static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & ICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & ICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
+}
+
+static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+static void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v3_ops = {
+	.get_lr			= vgic_v3_get_lr,
+	.set_lr			= vgic_v3_set_lr,
+	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
+	.get_elrsr		= vgic_v3_get_elrsr,
+	.get_eisr		= vgic_v3_get_eisr,
+	.get_interrupt_status	= vgic_v3_get_interrupt_status,
+	.enable_underflow	= vgic_v3_enable_underflow,
+	.disable_underflow	= vgic_v3_disable_underflow,
+	.get_vmcr		= vgic_v3_get_vmcr,
+	.set_vmcr		= vgic_v3_set_vmcr,
+	.enable			= vgic_v3_enable,
+};
+
+static struct vgic_params vgic_v3_params;
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv3 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv3 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret = 0;
+	u32 gicv_idx;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v3_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+	/*
+	 * The ListRegs field is 5 bits, but there is a architectural
+	 * maximum of 16 list registers. Just ignore bit 4...
+	 */
+	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
+		gicv_idx = 1;
+
+	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV region\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out;
+	}
+
+	vgic->vcpu_base = vcpu_res.start;
+	vgic->vctrl_base = NULL;
+	vgic->type = VGIC_V3;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vcpu_res.start, vgic->maint_irq);
+
+	*ops = &vgic_v3_ops;
+	*params = vgic;
+
+out:
+	of_node_put(vgic_node);
+	return ret;
+}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
new file mode 100644
index 000000000000..8e1dc03342c3
--- /dev/null
+++ b/virt/kvm/arm/vgic.c
@@ -0,0 +1,2464 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/uaccess.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * How the whole thing works (courtesy of Christoffer Dall):
+ *
+ * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
+ *   something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ *   arch. timers).
+ * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
+ *   recalculated
+ * - To calculate the oracle, we need info for each cpu from
+ *   compute_pending_for_cpu, which considers:
+ *   - PPI: dist->irq_pending & dist->irq_enable
+ *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
+ *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
+ *     registers, stored on each vcpu. We only keep one bit of
+ *     information per interrupt, making sure that only one vcpu can
+ *     accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
+ * - The same is true when injecting an interrupt, except that we only
+ *   consider a single interrupt at a time. The irq_spi_cpu array
+ *   contains the target CPU for each SPI.
+ *
+ * The handling of level interrupts adds some extra complexity. We
+ * need to track when the interrupt has been EOIed, so we can sample
+ * the 'line' again. This is achieved as such:
+ *
+ * - When a level interrupt is moved onto a vcpu, the corresponding
+ *   bit in irq_queued is set. As long as this bit is set, the line
+ *   will be ignored for further interrupts. The interrupt is injected
+ *   into the vcpu with the GICH_LR_EOI bit set (generate a
+ *   maintenance interrupt on EOI).
+ * - When the interrupt is EOIed, the maintenance interrupt fires,
+ *   and clears the corresponding bit in irq_queued. This allows the
+ *   interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ *   writes to GICD_ISPENDRn and lowering the external input line does not
+ *   cause the interrupt to become inactive in such a situation.
+ *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ *   inactive as long as the external input line is held high.
+ */
+
+#define VGIC_ADDR_UNDEF		(-1)
+#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
+
+#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
+#define IMPLEMENTER_ARM		0x43b
+#define GICC_ARCH_VERSION_V2	0x2
+
+#define ACCESS_READ_VALUE	(1 << 0)
+#define ACCESS_READ_RAZ		(0 << 0)
+#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
+#define ACCESS_WRITE_IGNORED	(0 << 1)
+#define ACCESS_WRITE_SETBIT	(1 << 1)
+#define ACCESS_WRITE_CLEARBIT	(2 << 1)
+#define ACCESS_WRITE_VALUE	(3 << 1)
+#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
+
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+static void vgic_update_state(struct kvm *kvm);
+static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+
+static const struct vgic_ops *vgic_ops;
+static const struct vgic_params *vgic;
+
+/*
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
+ *
+ * This does not work on 64-bit BE systems, because the bitmap access
+ * will store two consecutive 32-bit words with the higher-addressed
+ * register's bits at the lower index and the lower-addressed register's
+ * bits at the higher index.
+ *
+ * Therefore, swizzle the register index when accessing the 32-bit word
+ * registers to access the right register's value.
+ */
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+#define REG_OFFSET_SWIZZLE	1
+#else
+#define REG_OFFSET_SWIZZLE	0
+#endif
+
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+	int nr_longs;
+
+	nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+	b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+	if (!b->private)
+		return -ENOMEM;
+
+	b->shared = b->private + nr_cpus;
+
+	return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
+}
+
+static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
+				int cpuid, u32 offset)
+{
+	offset >>= 2;
+	if (!offset)
+		return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
+	else
+		return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+}
+
+static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
+				   int cpuid, int irq)
+{
+	if (irq < VGIC_NR_PRIVATE_IRQS)
+		return test_bit(irq, x->private + cpuid);
+
+	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
+}
+
+static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
+				    int irq, int val)
+{
+	unsigned long *reg;
+
+	if (irq < VGIC_NR_PRIVATE_IRQS) {
+		reg = x->private + cpuid;
+	} else {
+		reg = x->shared;
+		irq -= VGIC_NR_PRIVATE_IRQS;
+	}
+
+	if (val)
+		set_bit(irq, reg);
+	else
+		clear_bit(irq, reg);
+}
+
+static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
+{
+	return x->private + cpuid;
+}
+
+static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
+{
+	return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+	int size;
+
+	size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+	size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+	x->private = kzalloc(size, GFP_KERNEL);
+	if (!x->private)
+		return -ENOMEM;
+
+	x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+	return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
+}
+
+static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
+{
+	u32 *reg;
+
+	if (offset < VGIC_NR_PRIVATE_IRQS) {
+		reg = x->private;
+		offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+	} else {
+		reg = x->shared;
+		offset -= VGIC_NR_PRIVATE_IRQS;
+	}
+
+	return reg + (offset / sizeof(u32));
+}
+
+#define VGIC_CFG_LEVEL	0
+#define VGIC_CFG_EDGE	1
+
+static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int irq_val;
+
+	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
+	return irq_val == VGIC_CFG_EDGE;
+}
+
+static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
+}
+
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
+}
+
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
+}
+
+static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
+{
+	if (irq < VGIC_NR_PRIVATE_IRQS)
+		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
+	else
+		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
+			vcpu->arch.vgic_cpu.pending_shared);
+}
+
+static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
+{
+	if (irq < VGIC_NR_PRIVATE_IRQS)
+		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
+	else
+		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
+			  vcpu->arch.vgic_cpu.pending_shared);
+}
+
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
+static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
+{
+	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
+}
+
+static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
+{
+	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
+}
+
+/**
+ * vgic_reg_access - access vgic register
+ * @mmio:   pointer to the data describing the mmio access
+ * @reg:    pointer to the virtual backing of vgic distributor data
+ * @offset: least significant 2 bits used for word offset
+ * @mode:   ACCESS_ mode (see defines above)
+ *
+ * Helper to make vgic register access easier using one of the access
+ * modes defined for vgic register access
+ * (read,raz,write-ignored,setbit,clearbit,write)
+ */
+static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
+			    phys_addr_t offset, int mode)
+{
+	int word_offset = (offset & 3) * 8;
+	u32 mask = (1UL << (mmio->len * 8)) - 1;
+	u32 regval;
+
+	/*
+	 * Any alignment fault should have been delivered to the guest
+	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
+	 */
+
+	if (reg) {
+		regval = *reg;
+	} else {
+		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
+		regval = 0;
+	}
+
+	if (mmio->is_write) {
+		u32 data = mmio_data_read(mmio, mask) << word_offset;
+		switch (ACCESS_WRITE_MASK(mode)) {
+		case ACCESS_WRITE_IGNORED:
+			return;
+
+		case ACCESS_WRITE_SETBIT:
+			regval |= data;
+			break;
+
+		case ACCESS_WRITE_CLEARBIT:
+			regval &= ~data;
+			break;
+
+		case ACCESS_WRITE_VALUE:
+			regval = (regval & ~(mask << word_offset)) | data;
+			break;
+		}
+		*reg = regval;
+	} else {
+		switch (ACCESS_READ_MASK(mode)) {
+		case ACCESS_READ_RAZ:
+			regval = 0;
+			/* fall through */
+
+		case ACCESS_READ_VALUE:
+			mmio_data_write(mmio, mask, regval >> word_offset);
+		}
+	}
+}
+
+static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg;
+	u32 word_offset = offset & 3;
+
+	switch (offset & ~3) {
+	case 0:			/* GICD_CTLR */
+		reg = vcpu->kvm->arch.vgic.enabled;
+		vgic_reg_access(mmio, &reg, word_offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+		if (mmio->is_write) {
+			vcpu->kvm->arch.vgic.enabled = reg & 1;
+			vgic_update_state(vcpu->kvm);
+			return true;
+		}
+		break;
+
+	case 4:			/* GICD_TYPER */
+		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
+		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
+		vgic_reg_access(mmio, &reg, word_offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+		break;
+
+	case 8:			/* GICD_IIDR */
+		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		vgic_reg_access(mmio, &reg, word_offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+		break;
+	}
+
+	return false;
+}
+
+static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
+			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
+				       struct kvm_exit_mmio *mmio,
+				       phys_addr_t offset)
+{
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
+				       vcpu->vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+	if (mmio->is_write) {
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
+					 struct kvm_exit_mmio *mmio,
+					 phys_addr_t offset)
+{
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
+				       vcpu->vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+	if (mmio->is_write) {
+		if (offset < 4) /* Force SGI enabled */
+			*reg |= 0xffff;
+		vgic_retire_disabled_irqs(vcpu);
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
+					struct kvm_exit_mmio *mmio,
+					phys_addr_t offset)
+{
+	u32 *reg, orig;
+	u32 level_mask;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+	level_mask = (~(*reg));
+
+	/* Mark both level and edge triggered irqs as pending */
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+	if (mmio->is_write) {
+		/* Set the soft-pending flag only for level-triggered irqs */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+		*reg &= level_mask;
+
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
+					  struct kvm_exit_mmio *mmio,
+					  phys_addr_t offset)
+{
+	u32 *level_active;
+	u32 *reg, orig;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+	if (mmio->is_write) {
+		/* Re-set level triggered level-active interrupts */
+		level_active = vgic_bitmap_get_reg(&dist->irq_level,
+					  vcpu->vcpu_id, offset);
+		reg = vgic_bitmap_get_reg(&dist->irq_pending,
+					  vcpu->vcpu_id, offset);
+		*reg |= *level_active;
+
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
+		/* Clear soft-pending flags */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
+				     struct kvm_exit_mmio *mmio,
+				     phys_addr_t offset)
+{
+	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
+					vcpu->vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	return false;
+}
+
+#define GICD_ITARGETSR_SIZE	32
+#define GICD_CPUTARGETS_BITS	8
+#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
+static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int i;
+	u32 val = 0;
+
+	irq -= VGIC_NR_PRIVATE_IRQS;
+
+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
+		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
+
+	return val;
+}
+
+static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int i, c;
+	unsigned long *bmap;
+	u32 target;
+
+	irq -= VGIC_NR_PRIVATE_IRQS;
+
+	/*
+	 * Pick the LSB in each byte. This ensures we target exactly
+	 * one vcpu per IRQ. If the byte is null, assume we target
+	 * CPU0.
+	 */
+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
+		int shift = i * GICD_CPUTARGETS_BITS;
+		target = ffs((val >> shift) & 0xffU);
+		target = target ? (target - 1) : 0;
+		dist->irq_spi_cpu[irq + i] = target;
+		kvm_for_each_vcpu(c, vcpu, kvm) {
+			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
+			if (c == target)
+				set_bit(irq + i, bmap);
+			else
+				clear_bit(irq + i, bmap);
+		}
+	}
+}
+
+static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
+				   struct kvm_exit_mmio *mmio,
+				   phys_addr_t offset)
+{
+	u32 reg;
+
+	/* We treat the banked interrupts targets as read-only */
+	if (offset < 32) {
+		u32 roreg = 1 << vcpu->vcpu_id;
+		roreg |= roreg << 8;
+		roreg |= roreg << 16;
+
+		vgic_reg_access(mmio, &roreg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	if (mmio->is_write) {
+		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+static u32 vgic_cfg_expand(u16 val)
+{
+	u32 res = 0;
+	int i;
+
+	/*
+	 * Turn a 16bit value like abcd...mnop into a 32bit word
+	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
+	 */
+	for (i = 0; i < 16; i++)
+		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
+
+	return res;
+}
+
+static u16 vgic_cfg_compress(u32 val)
+{
+	u16 res = 0;
+	int i;
+
+	/*
+	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
+	 * abcd...mnop which is what we really care about.
+	 */
+	for (i = 0; i < 16; i++)
+		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
+
+	return res;
+}
+
+/*
+ * The distributor uses 2 bits per IRQ for the CFG register, but the
+ * LSB is always 0. As such, we only keep the upper bit, and use the
+ * two above functions to compress/expand the bits
+ */
+static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
+				struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 val;
+	u32 *reg;
+
+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				  vcpu->vcpu_id, offset >> 1);
+
+	if (offset & 4)
+		val = *reg >> 16;
+	else
+		val = *reg & 0xffff;
+
+	val = vgic_cfg_expand(val);
+	vgic_reg_access(mmio, &val, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	if (mmio->is_write) {
+		if (offset < 8) {
+			*reg = ~0U; /* Force PPIs/SGIs to 1 */
+			return false;
+		}
+
+		val = vgic_cfg_compress(val);
+		if (offset & 4) {
+			*reg &= 0xffff;
+			*reg |= val << 16;
+		} else {
+			*reg &= 0xffff << 16;
+			*reg |= val;
+		}
+	}
+
+	return false;
+}
+
+static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
+				struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg;
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
+	if (mmio->is_write) {
+		vgic_dispatch_sgi(vcpu, reg);
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
+ *
+ * Move any pending IRQs that have already been assigned to LRs back to the
+ * emulated distributor state so that the complete emulated state can be read
+ * from the main emulation structures without investigating the LRs.
+ *
+ * Note that IRQs in the active state in the LRs get their pending state moved
+ * to the distributor but the active state stays in the LRs, because we don't
+ * track the active state on the distributor side.
+ */
+static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int vcpu_id = vcpu->vcpu_id;
+	int i;
+
+	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+		struct vgic_lr lr = vgic_get_lr(vcpu, i);
+
+		/*
+		 * There are three options for the state bits:
+		 *
+		 * 01: pending
+		 * 10: active
+		 * 11: pending and active
+		 *
+		 * If the LR holds only an active interrupt (not pending) then
+		 * just leave it alone.
+		 */
+		if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
+			continue;
+
+		/*
+		 * Reestablish the pending state on the distributor and the
+		 * CPU interface.  It may have already been pending, but that
+		 * is fine, then we are only setting a few bits that were
+		 * already set.
+		 */
+		vgic_dist_irq_set_pending(vcpu, lr.irq);
+		if (lr.irq < VGIC_NR_SGIS)
+			*vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
+		lr.state &= ~LR_STATE_PENDING;
+		vgic_set_lr(vcpu, i, lr);
+
+		/*
+		 * If there's no state left on the LR (it could still be
+		 * active), then the LR does not hold any useful info and can
+		 * be marked as free for other use.
+		 */
+		if (!(lr.state & LR_STATE_MASK)) {
+			vgic_retire_lr(i, lr.irq, vcpu);
+			vgic_irq_clear_queued(vcpu, lr.irq);
+		}
+
+		/* Finally update the VGIC state. */
+		vgic_update_state(vcpu->kvm);
+	}
+}
+
+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					struct kvm_exit_mmio *mmio,
+					phys_addr_t offset)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3);
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg = 0;
+
+	/* Copy source SGIs from distributor side */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		int shift = 8 * (sgi - min_sgi);
+		reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
+	}
+
+	mmio_data_write(mmio, ~0, reg);
+	return false;
+}
+
+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					 struct kvm_exit_mmio *mmio,
+					 phys_addr_t offset, bool set)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3);
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg;
+	bool updated = false;
+
+	reg = mmio_data_read(mmio, ~0);
+
+	/* Clear pending SGIs on the distributor */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		u8 mask = reg >> (8 * (sgi - min_sgi));
+		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
+		if (set) {
+			if ((*src & mask) != mask)
+				updated = true;
+			*src |= mask;
+		} else {
+			if (*src & mask)
+				updated = true;
+			*src &= ~mask;
+		}
+	}
+
+	if (updated)
+		vgic_update_state(vcpu->kvm);
+
+	return updated;
+}
+
+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
+				struct kvm_exit_mmio *mmio,
+				phys_addr_t offset)
+{
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
+}
+
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
+}
+
+/*
+ * I would have liked to use the kvm_bus_io_*() API instead, but it
+ * cannot cope with banked registers (only the VM pointer is passed
+ * around, and we need the vcpu). One of these days, someone please
+ * fix it!
+ */
+struct mmio_range {
+	phys_addr_t base;
+	unsigned long len;
+	int bits_per_irq;
+	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
+			    phys_addr_t offset);
+};
+
+static const struct mmio_range vgic_dist_ranges[] = {
+	{
+		.base		= GIC_DIST_CTRL,
+		.len		= 12,
+		.bits_per_irq	= 0,
+		.handle_mmio	= handle_mmio_misc,
+	},
+	{
+		.base		= GIC_DIST_IGROUP,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GIC_DIST_ENABLE_SET,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_enable_reg,
+	},
+	{
+		.base		= GIC_DIST_ENABLE_CLEAR,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_enable_reg,
+	},
+	{
+		.base		= GIC_DIST_PENDING_SET,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_pending_reg,
+	},
+	{
+		.base		= GIC_DIST_PENDING_CLEAR,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_pending_reg,
+	},
+	{
+		.base		= GIC_DIST_ACTIVE_SET,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GIC_DIST_ACTIVE_CLEAR,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GIC_DIST_PRI,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_priority_reg,
+	},
+	{
+		.base		= GIC_DIST_TARGET,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_target_reg,
+	},
+	{
+		.base		= GIC_DIST_CONFIG,
+		.len		= VGIC_MAX_IRQS / 4,
+		.bits_per_irq	= 2,
+		.handle_mmio	= handle_mmio_cfg_reg,
+	},
+	{
+		.base		= GIC_DIST_SOFTINT,
+		.len		= 4,
+		.handle_mmio	= handle_mmio_sgi_reg,
+	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_CLEAR,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_clear,
+	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_SET,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_set,
+	},
+	{}
+};
+
+static const
+struct mmio_range *find_matching_range(const struct mmio_range *ranges,
+				       struct kvm_exit_mmio *mmio,
+				       phys_addr_t offset)
+{
+	const struct mmio_range *r = ranges;
+
+	while (r->len) {
+		if (offset >= r->base &&
+		    (offset + mmio->len) <= (r->base + r->len))
+			return r;
+		r++;
+	}
+
+	return NULL;
+}
+
+static bool vgic_validate_access(const struct vgic_dist *dist,
+				 const struct mmio_range *range,
+				 unsigned long offset)
+{
+	int irq;
+
+	if (!range->bits_per_irq)
+		return true;	/* Not an irq-based access */
+
+	irq = offset * 8 / range->bits_per_irq;
+	if (irq >= dist->nr_irqs)
+		return false;
+
+	return true;
+}
+
+/**
+ * vgic_handle_mmio - handle an in-kernel MMIO access
+ * @vcpu:	pointer to the vcpu performing the access
+ * @run:	pointer to the kvm_run structure
+ * @mmio:	pointer to the data describing the access
+ *
+ * returns true if the MMIO access has been performed in kernel space,
+ * and false if it needs to be emulated in user space.
+ */
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		      struct kvm_exit_mmio *mmio)
+{
+	const struct mmio_range *range;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long base = dist->vgic_dist_base;
+	bool updated_state;
+	unsigned long offset;
+
+	if (!irqchip_in_kernel(vcpu->kvm) ||
+	    mmio->phys_addr < base ||
+	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
+		return false;
+
+	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
+	if (mmio->len > 4) {
+		kvm_inject_dabt(vcpu, mmio->phys_addr);
+		return true;
+	}
+
+	offset = mmio->phys_addr - base;
+	range = find_matching_range(vgic_dist_ranges, mmio, offset);
+	if (unlikely(!range || !range->handle_mmio)) {
+		pr_warn("Unhandled access %d %08llx %d\n",
+			mmio->is_write, mmio->phys_addr, mmio->len);
+		return false;
+	}
+
+	spin_lock(&vcpu->kvm->arch.vgic.lock);
+	offset = mmio->phys_addr - range->base - base;
+	if (vgic_validate_access(dist, range, offset)) {
+		updated_state = range->handle_mmio(vcpu, mmio, offset);
+	} else {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		updated_state = false;
+	}
+	spin_unlock(&vcpu->kvm->arch.vgic.lock);
+	kvm_prepare_mmio(run, mmio);
+	kvm_handle_mmio_return(vcpu, run);
+
+	if (updated_state)
+		vgic_kick_vcpus(vcpu->kvm);
+
+	return true;
+}
+
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int nrcpus = atomic_read(&kvm->online_vcpus);
+	u8 target_cpus;
+	int sgi, mode, c, vcpu_id;
+
+	vcpu_id = vcpu->vcpu_id;
+
+	sgi = reg & 0xf;
+	target_cpus = (reg >> 16) & 0xff;
+	mode = (reg >> 24) & 3;
+
+	switch (mode) {
+	case 0:
+		if (!target_cpus)
+			return;
+		break;
+
+	case 1:
+		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
+		break;
+
+	case 2:
+		target_cpus = 1 << vcpu_id;
+		break;
+	}
+
+	kvm_for_each_vcpu(c, vcpu, kvm) {
+		if (target_cpus & 1) {
+			/* Flag the SGI as pending */
+			vgic_dist_irq_set_pending(vcpu, sgi);
+			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
+			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
+		}
+
+		target_cpus >>= 1;
+	}
+}
+
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
+static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
+	unsigned long pending_private, pending_shared;
+	int nr_shared = vgic_nr_shared_irqs(dist);
+	int vcpu_id;
+
+	vcpu_id = vcpu->vcpu_id;
+	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
+	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
+
+	pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
+	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
+
+	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
+	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+	bitmap_and(pend_shared, pending, enabled, nr_shared);
+	bitmap_and(pend_shared, pend_shared,
+		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+		   nr_shared);
+
+	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+	pending_shared = find_first_bit(pend_shared, nr_shared);
+	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
+		pending_shared < vgic_nr_shared_irqs(dist));
+}
+
+/*
+ * Update the interrupt state and determine which CPUs have pending
+ * interrupts. Must be called with distributor lock held.
+ */
+static void vgic_update_state(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int c;
+
+	if (!dist->enabled) {
+		set_bit(0, dist->irq_pending_on_cpu);
+		return;
+	}
+
+	kvm_for_each_vcpu(c, vcpu, kvm) {
+		if (compute_pending_for_cpu(vcpu)) {
+			pr_debug("CPU%d has pending interrupts\n", c);
+			set_bit(c, dist->irq_pending_on_cpu);
+		}
+	}
+}
+
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	return vgic_ops->get_lr(vcpu, lr);
+}
+
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops->set_lr(vcpu, lr, vlr);
+}
+
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops->get_elrsr(vcpu);
+}
+
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops->get_eisr(vcpu);
+}
+
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops->get_interrupt_status(vcpu);
+}
+
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops->enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops->disable_underflow(vcpu);
+}
+
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops->get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops->set_vmcr(vcpu, vmcr);
+}
+
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+	vgic_ops->enable(vcpu);
+}
+
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+
+	vlr.state = 0;
+	vgic_set_lr(vcpu, lr_nr, vlr);
+	clear_bit(lr_nr, vgic_cpu->lr_used);
+	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
+
+/*
+ * An interrupt may have been disabled after being made pending on the
+ * CPU interface (the classic case is a timer running while we're
+ * rebooting the guest - the interrupt would kick as soon as the CPU
+ * interface gets enabled, with deadly consequences).
+ *
+ * The solution is to examine already active LRs, and check the
+ * interrupt is still enabled. If not, just retire it.
+ */
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int lr;
+
+	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+
+		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+			vgic_retire_lr(lr, vlr.irq, vcpu);
+			if (vgic_irq_is_queued(vcpu, vlr.irq))
+				vgic_irq_clear_queued(vcpu, vlr.irq);
+		}
+	}
+}
+
+/*
+ * Queue an interrupt to a CPU virtual interface. Return true on success,
+ * or false if it wasn't possible to queue it.
+ */
+static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_lr vlr;
+	int lr;
+
+	/* Sanitize the input... */
+	BUG_ON(sgi_source_id & ~7);
+	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
+	BUG_ON(irq >= dist->nr_irqs);
+
+	kvm_debug("Queue IRQ%d\n", irq);
+
+	lr = vgic_cpu->vgic_irq_lr_map[irq];
+
+	/* Do we have an active interrupt for the same CPUID? */
+	if (lr != LR_EMPTY) {
+		vlr = vgic_get_lr(vcpu, lr);
+		if (vlr.source == sgi_source_id) {
+			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+			vlr.state |= LR_STATE_PENDING;
+			vgic_set_lr(vcpu, lr, vlr);
+			return true;
+		}
+	}
+
+	/* Try to use another LR for this interrupt */
+	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
+			       vgic->nr_lr);
+	if (lr >= vgic->nr_lr)
+		return false;
+
+	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
+	vgic_cpu->vgic_irq_lr_map[irq] = lr;
+	set_bit(lr, vgic_cpu->lr_used);
+
+	vlr.irq = irq;
+	vlr.source = sgi_source_id;
+	vlr.state = LR_STATE_PENDING;
+	if (!vgic_irq_is_edge(vcpu, irq))
+		vlr.state |= LR_EOI_INT;
+
+	vgic_set_lr(vcpu, lr, vlr);
+
+	return true;
+}
+
+static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long sources;
+	int vcpu_id = vcpu->vcpu_id;
+	int c;
+
+	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
+
+	for_each_set_bit(c, &sources, dist->nr_cpus) {
+		if (vgic_queue_irq(vcpu, c, irq))
+			clear_bit(c, &sources);
+	}
+
+	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
+
+	/*
+	 * If the sources bitmap has been cleared it means that we
+	 * could queue all the SGIs onto link registers (see the
+	 * clear_bit above), and therefore we are done with them in
+	 * our emulated gic and can get rid of them.
+	 */
+	if (!sources) {
+		vgic_dist_irq_clear_pending(vcpu, irq);
+		vgic_cpu_irq_clear(vcpu, irq);
+		return true;
+	}
+
+	return false;
+}
+
+static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
+{
+	if (!vgic_can_sample_irq(vcpu, irq))
+		return true; /* level interrupt, already queued */
+
+	if (vgic_queue_irq(vcpu, 0, irq)) {
+		if (vgic_irq_is_edge(vcpu, irq)) {
+			vgic_dist_irq_clear_pending(vcpu, irq);
+			vgic_cpu_irq_clear(vcpu, irq);
+		} else {
+			vgic_irq_set_queued(vcpu, irq);
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Fill the list registers with pending interrupts before running the
+ * guest.
+ */
+static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int i, vcpu_id;
+	int overflow = 0;
+
+	vcpu_id = vcpu->vcpu_id;
+
+	/*
+	 * We may not have any pending interrupt, or the interrupts
+	 * may have been serviced from another vcpu. In all cases,
+	 * move along.
+	 */
+	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
+		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+		goto epilog;
+	}
+
+	/* SGIs */
+	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+		if (!vgic_queue_sgi(vcpu, i))
+			overflow = 1;
+	}
+
+	/* PPIs */
+	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+		if (!vgic_queue_hwirq(vcpu, i))
+			overflow = 1;
+	}
+
+	/* SPIs */
+	for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
+			overflow = 1;
+	}
+
+epilog:
+	if (overflow) {
+		vgic_enable_underflow(vcpu);
+	} else {
+		vgic_disable_underflow(vcpu);
+		/*
+		 * We're about to run this VCPU, and we've consumed
+		 * everything the distributor had in store for
+		 * us. Claim we don't have anything pending. We'll
+		 * adjust that if needed while exiting.
+		 */
+		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
+	}
+}
+
+static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
+{
+	u32 status = vgic_get_interrupt_status(vcpu);
+	bool level_pending = false;
+
+	kvm_debug("STATUS = %08x\n", status);
+
+	if (status & INT_STATUS_EOI) {
+		/*
+		 * Some level interrupts have been EOIed. Clear their
+		 * active bit.
+		 */
+		u64 eisr = vgic_get_eisr(vcpu);
+		unsigned long *eisr_ptr = (unsigned long *)&eisr;
+		int lr;
+
+		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
+			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
+
+			vgic_irq_clear_queued(vcpu, vlr.irq);
+			WARN_ON(vlr.state & LR_STATE_MASK);
+			vlr.state = 0;
+			vgic_set_lr(vcpu, lr, vlr);
+
+			/*
+			 * If the IRQ was EOIed it was also ACKed and we we
+			 * therefore assume we can clear the soft pending
+			 * state (should it had been set) for this interrupt.
+			 *
+			 * Note: if the IRQ soft pending state was set after
+			 * the IRQ was acked, it actually shouldn't be
+			 * cleared, but we have no way of knowing that unless
+			 * we start trapping ACKs when the soft-pending state
+			 * is set.
+			 */
+			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
+			/* Any additional pending interrupt? */
+			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+				vgic_cpu_irq_set(vcpu, vlr.irq);
+				level_pending = true;
+			} else {
+				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+				vgic_cpu_irq_clear(vcpu, vlr.irq);
+			}
+
+			/*
+			 * Despite being EOIed, the LR may not have
+			 * been marked as empty.
+			 */
+			vgic_sync_lr_elrsr(vcpu, lr, vlr);
+		}
+	}
+
+	if (status & INT_STATUS_UNDERFLOW)
+		vgic_disable_underflow(vcpu);
+
+	return level_pending;
+}
+
+/*
+ * Sync back the VGIC state after a guest run. The distributor lock is
+ * needed so we don't get preempted in the middle of the state processing.
+ */
+static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	u64 elrsr;
+	unsigned long *elrsr_ptr;
+	int lr, pending;
+	bool level_pending;
+
+	level_pending = vgic_process_maintenance(vcpu);
+	elrsr = vgic_get_elrsr(vcpu);
+	elrsr_ptr = (unsigned long *)&elrsr;
+
+	/* Clear mappings for empty LRs */
+	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+		struct vgic_lr vlr;
+
+		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
+			continue;
+
+		vlr = vgic_get_lr(vcpu, lr);
+
+		BUG_ON(vlr.irq >= dist->nr_irqs);
+		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
+	}
+
+	/* Check if we still have something up our sleeve... */
+	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+	if (level_pending || pending < vgic->nr_lr)
+		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+}
+
+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return;
+
+	spin_lock(&dist->lock);
+	__kvm_vgic_flush_hwstate(vcpu);
+	spin_unlock(&dist->lock);
+}
+
+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return;
+
+	spin_lock(&dist->lock);
+	__kvm_vgic_sync_hwstate(vcpu);
+	spin_unlock(&dist->lock);
+}
+
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 0;
+
+	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+}
+
+static void vgic_kick_vcpus(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int c;
+
+	/*
+	 * We've injected an interrupt, time to find out who deserves
+	 * a good kick...
+	 */
+	kvm_for_each_vcpu(c, vcpu, kvm) {
+		if (kvm_vgic_vcpu_pending_irq(vcpu))
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
+static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
+{
+	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
+
+	/*
+	 * Only inject an interrupt if:
+	 * - edge triggered and we have a rising edge
+	 * - level triggered and we change level
+	 */
+	if (edge_triggered) {
+		int state = vgic_dist_irq_is_pending(vcpu, irq);
+		return level > state;
+	} else {
+		int state = vgic_dist_irq_get_level(vcpu, irq);
+		return level != state;
+	}
+}
+
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+				  unsigned int irq_num, bool level)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int edge_triggered, level_triggered;
+	int enabled;
+	bool ret = true;
+
+	spin_lock(&dist->lock);
+
+	vcpu = kvm_get_vcpu(kvm, cpuid);
+	edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+	level_triggered = !edge_triggered;
+
+	if (!vgic_validate_injection(vcpu, irq_num, level)) {
+		ret = false;
+		goto out;
+	}
+
+	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
+		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
+		vcpu = kvm_get_vcpu(kvm, cpuid);
+	}
+
+	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
+
+	if (level) {
+		if (level_triggered)
+			vgic_dist_irq_set_level(vcpu, irq_num);
+		vgic_dist_irq_set_pending(vcpu, irq_num);
+	} else {
+		if (level_triggered) {
+			vgic_dist_irq_clear_level(vcpu, irq_num);
+			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+				vgic_dist_irq_clear_pending(vcpu, irq_num);
+		} else {
+			vgic_dist_irq_clear_pending(vcpu, irq_num);
+		}
+	}
+
+	enabled = vgic_irq_is_enabled(vcpu, irq_num);
+
+	if (!enabled) {
+		ret = false;
+		goto out;
+	}
+
+	if (!vgic_can_sample_irq(vcpu, irq_num)) {
+		/*
+		 * Level interrupt in progress, will be picked up
+		 * when EOId.
+		 */
+		ret = false;
+		goto out;
+	}
+
+	if (level) {
+		vgic_cpu_irq_set(vcpu, irq_num);
+		set_bit(cpuid, dist->irq_pending_on_cpu);
+	}
+
+out:
+	spin_unlock(&dist->lock);
+
+	return ret;
+}
+
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm:     The VM structure pointer
+ * @cpuid:   The CPU for PPIs
+ * @irq_num: The IRQ number that is assigned to the device
+ * @level:   Edge-triggered:  true:  to trigger the interrupt
+ *			      false: to ignore the call
+ *	     Level-sensitive  true:  activates an interrupt
+ *			      false: deactivates an interrupt
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts.  You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+			bool level)
+{
+	if (likely(vgic_initialized(kvm)) &&
+	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
+		vgic_kick_vcpus(kvm);
+
+	return 0;
+}
+
+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
+{
+	/*
+	 * We cannot rely on the vgic maintenance interrupt to be
+	 * delivered synchronously. This means we can only use it to
+	 * exit the VM, and we perform the handling of EOIed
+	 * interrupts on the exit path (see vgic_process_maintenance).
+	 */
+	return IRQ_HANDLED;
+}
+
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	kfree(vgic_cpu->pending_shared);
+	kfree(vgic_cpu->vgic_irq_lr_map);
+	vgic_cpu->pending_shared = NULL;
+	vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+		kvm_vgic_vcpu_destroy(vcpu);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
+ * @vcpu: pointer to the vcpu struct
+ *
+ * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
+ * this vcpu and enable the VGIC for this VCPU
+ */
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int i;
+
+	for (i = 0; i < dist->nr_irqs; i++) {
+		if (i < VGIC_NR_PPIS)
+			vgic_bitmap_set_irq_val(&dist->irq_enabled,
+						vcpu->vcpu_id, i, 1);
+		if (i < VGIC_NR_PRIVATE_IRQS)
+			vgic_bitmap_set_irq_val(&dist->irq_cfg,
+						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
+
+		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
+	}
+
+	/*
+	 * Store the number of LRs per vcpu, so we don't have to go
+	 * all the way to the distributor structure to find out. Only
+	 * assembly code should use this one.
+	 */
+	vgic_cpu->nr_lr = vgic->nr_lr;
+
+	vgic_enable(vcpu);
+}
+
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_destroy(vcpu);
+
+	vgic_free_bitmap(&dist->irq_enabled);
+	vgic_free_bitmap(&dist->irq_level);
+	vgic_free_bitmap(&dist->irq_pending);
+	vgic_free_bitmap(&dist->irq_soft_pend);
+	vgic_free_bitmap(&dist->irq_queued);
+	vgic_free_bitmap(&dist->irq_cfg);
+	vgic_free_bytemap(&dist->irq_priority);
+	if (dist->irq_spi_target) {
+		for (i = 0; i < dist->nr_cpus; i++)
+			vgic_free_bitmap(&dist->irq_spi_target[i]);
+	}
+	kfree(dist->irq_sgi_sources);
+	kfree(dist->irq_spi_cpu);
+	kfree(dist->irq_spi_target);
+	kfree(dist->irq_pending_on_cpu);
+	dist->irq_sgi_sources = NULL;
+	dist->irq_spi_cpu = NULL;
+	dist->irq_spi_target = NULL;
+	dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int nr_cpus, nr_irqs;
+	int ret, i;
+
+	if (dist->nr_cpus)	/* Already allocated */
+		return 0;
+
+	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+	if (!nr_cpus)		/* No vcpus? Can't be good... */
+		return -EINVAL;
+
+	/*
+	 * If nobody configured the number of interrupts, use the
+	 * legacy one.
+	 */
+	if (!dist->nr_irqs)
+		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+	nr_irqs = dist->nr_irqs;
+
+	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+	dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+	dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+				       GFP_KERNEL);
+	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+					   GFP_KERNEL);
+	if (!dist->irq_sgi_sources ||
+	    !dist->irq_spi_cpu ||
+	    !dist->irq_spi_target ||
+	    !dist->irq_pending_on_cpu) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+					nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+		if (ret) {
+			kvm_err("VGIC: Failed to allocate vcpu memory\n");
+			break;
+		}
+	}
+
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+
+	return ret;
+}
+
+/**
+ * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * @kvm: pointer to the kvm struct
+ *
+ * Map the virtual CPU interface into the VM before running any VCPUs.  We
+ * can't do this at creation time, because user space must first set the
+ * virtual CPU interface address in the guest physical address space.  Also
+ * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ */
+int kvm_vgic_init(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int ret = 0, i;
+
+	if (!irqchip_in_kernel(kvm))
+		return 0;
+
+	mutex_lock(&kvm->lock);
+
+	if (vgic_initialized(kvm))
+		goto out;
+
+	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+		kvm_err("Need to set vgic cpu and dist addresses first\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = vgic_init_maps(kvm);
+	if (ret) {
+		kvm_err("Unable to allocate maps\n");
+		goto out;
+	}
+
+	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+	if (ret) {
+		kvm_err("Unable to remap VGIC CPU to VCPU\n");
+		goto out;
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_init(vcpu);
+
+	kvm->arch.vgic.ready = true;
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+int kvm_vgic_create(struct kvm *kvm)
+{
+	int i, vcpu_lock_idx = -1, ret = 0;
+	struct kvm_vcpu *vcpu;
+
+	mutex_lock(&kvm->lock);
+
+	if (kvm->arch.vgic.vctrl_base) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	/*
+	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
+	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
+	 * that no other VCPUs are run while we create the vgic.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!mutex_trylock(&vcpu->mutex))
+			goto out_unlock;
+		vcpu_lock_idx = i;
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.has_run_once) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+	}
+
+	spin_lock_init(&kvm->arch.vgic.lock);
+	kvm->arch.vgic.in_kernel = true;
+	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
+	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+
+out_unlock:
+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+		mutex_unlock(&vcpu->mutex);
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static int vgic_ioaddr_overlap(struct kvm *kvm)
+{
+	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
+	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
+
+	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+		return 0;
+	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
+	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
+		return -EBUSY;
+	return 0;
+}
+
+static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
+			      phys_addr_t addr, phys_addr_t size)
+{
+	int ret;
+
+	if (addr & ~KVM_PHYS_MASK)
+		return -E2BIG;
+
+	if (addr & (SZ_4K - 1))
+		return -EINVAL;
+
+	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
+		return -EEXIST;
+	if (addr + size < addr)
+		return -EINVAL;
+
+	*ioaddr = addr;
+	ret = vgic_ioaddr_overlap(kvm);
+	if (ret)
+		*ioaddr = VGIC_ADDR_UNDEF;
+
+	return ret;
+}
+
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm:   pointer to the vm struct
+ * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @addr:  pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ *          address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space.  These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+	int r = 0;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+
+	mutex_lock(&kvm->lock);
+	switch (type) {
+	case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+					       *addr, KVM_VGIC_V2_DIST_SIZE);
+		} else {
+			*addr = vgic->vgic_dist_base;
+		}
+		break;
+	case KVM_VGIC_V2_ADDR_TYPE_CPU:
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+					       *addr, KVM_VGIC_V2_CPU_SIZE);
+		} else {
+			*addr = vgic->vgic_cpu_base;
+		}
+		break;
+	default:
+		r = -ENODEV;
+	}
+
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
+				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	bool updated = false;
+	struct vgic_vmcr vmcr;
+	u32 *vmcr_field;
+	u32 reg;
+
+	vgic_get_vmcr(vcpu, &vmcr);
+
+	switch (offset & ~0x3) {
+	case GIC_CPU_CTRL:
+		vmcr_field = &vmcr.ctlr;
+		break;
+	case GIC_CPU_PRIMASK:
+		vmcr_field = &vmcr.pmr;
+		break;
+	case GIC_CPU_BINPOINT:
+		vmcr_field = &vmcr.bpr;
+		break;
+	case GIC_CPU_ALIAS_BINPOINT:
+		vmcr_field = &vmcr.abpr;
+		break;
+	default:
+		BUG();
+	}
+
+	if (!mmio->is_write) {
+		reg = *vmcr_field;
+		mmio_data_write(mmio, ~0, reg);
+	} else {
+		reg = mmio_data_read(mmio, ~0);
+		if (reg != *vmcr_field) {
+			*vmcr_field = reg;
+			vgic_set_vmcr(vcpu, &vmcr);
+			updated = true;
+		}
+	}
+	return updated;
+}
+
+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
+}
+
+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	u32 reg;
+
+	if (mmio->is_write)
+		return false;
+
+	/* GICC_IIDR */
+	reg = (PRODUCT_ID_KVM << 20) |
+	      (GICC_ARCH_VERSION_V2 << 16) |
+	      (IMPLEMENTER_ARM << 0);
+	mmio_data_write(mmio, ~0, reg);
+	return false;
+}
+
+/*
+ * CPU Interface Register accesses - these are not accessed by the VM, but by
+ * user space for saving and restoring VGIC state.
+ */
+static const struct mmio_range vgic_cpu_ranges[] = {
+	{
+		.base		= GIC_CPU_CTRL,
+		.len		= 12,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_ALIAS_BINPOINT,
+		.len		= 4,
+		.handle_mmio	= handle_mmio_abpr,
+	},
+	{
+		.base		= GIC_CPU_ACTIVEPRIO,
+		.len		= 16,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GIC_CPU_IDENT,
+		.len		= 4,
+		.handle_mmio	= handle_cpu_mmio_ident,
+	},
+};
+
+static int vgic_attr_regs_access(struct kvm_device *dev,
+				 struct kvm_device_attr *attr,
+				 u32 *reg, bool is_write)
+{
+	const struct mmio_range *r = NULL, *ranges;
+	phys_addr_t offset;
+	int ret, cpuid, c;
+	struct kvm_vcpu *vcpu, *tmp_vcpu;
+	struct vgic_dist *vgic;
+	struct kvm_exit_mmio mmio;
+
+	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+
+	mutex_lock(&dev->kvm->lock);
+
+	ret = vgic_init_maps(dev->kvm);
+	if (ret)
+		goto out;
+
+	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+	vgic = &dev->kvm->arch.vgic;
+
+	mmio.len = 4;
+	mmio.is_write = is_write;
+	if (is_write)
+		mmio_data_write(&mmio, ~0, *reg);
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		mmio.phys_addr = vgic->vgic_dist_base + offset;
+		ranges = vgic_dist_ranges;
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		mmio.phys_addr = vgic->vgic_cpu_base + offset;
+		ranges = vgic_cpu_ranges;
+		break;
+	default:
+		BUG();
+	}
+	r = find_matching_range(ranges, &mmio, offset);
+
+	if (unlikely(!r || !r->handle_mmio)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+
+	spin_lock(&vgic->lock);
+
+	/*
+	 * Ensure that no other VCPU is running by checking the vcpu->cpu
+	 * field.  If no other VPCUs are running we can safely access the VGIC
+	 * state, because even if another VPU is run after this point, that
+	 * VCPU will not touch the vgic state, because it will block on
+	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+		if (unlikely(tmp_vcpu->cpu != -1)) {
+			ret = -EBUSY;
+			goto out_vgic_unlock;
+		}
+	}
+
+	/*
+	 * Move all pending IRQs from the LRs on all VCPUs so the pending
+	 * state can be properly represented in the register state accessible
+	 * through this API.
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
+		vgic_unqueue_irqs(tmp_vcpu);
+
+	offset -= r->base;
+	r->handle_mmio(vcpu, &mmio, offset);
+
+	if (!is_write)
+		*reg = mmio_data_read(&mmio, ~0);
+
+	ret = 0;
+out_vgic_unlock:
+	spin_unlock(&vgic->lock);
+out:
+	mutex_unlock(&dev->kvm->lock);
+	return ret;
+}
+
+static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+		return (r == -ENODEV) ? -ENXIO : r;
+	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		return vgic_attr_regs_access(dev, attr, &reg, true);
+	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 val;
+		int ret = 0;
+
+		if (get_user(val, uaddr))
+			return -EFAULT;
+
+		/*
+		 * We require:
+		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+		 * - at most 1024 interrupts
+		 * - a multiple of 32 interrupts
+		 */
+		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+		    val > VGIC_MAX_IRQS ||
+		    (val & 31))
+			return -EINVAL;
+
+		mutex_lock(&dev->kvm->lock);
+
+		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+			ret = -EBUSY;
+		else
+			dev->kvm->arch.vgic.nr_irqs = val;
+
+		mutex_unlock(&dev->kvm->lock);
+
+		return ret;
+	}
+
+	}
+
+	return -ENXIO;
+}
+
+static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r = -ENXIO;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+		if (r)
+			return (r == -ENODEV) ? -ENXIO : r;
+
+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
+			return -EFAULT;
+		break;
+	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg = 0;
+
+		r = vgic_attr_regs_access(dev, attr, &reg, false);
+		if (r)
+			return r;
+		r = put_user(reg, uaddr);
+		break;
+	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+		break;
+	}
+
+	}
+
+	return r;
+}
+
+static int vgic_has_attr_regs(const struct mmio_range *ranges,
+			      phys_addr_t offset)
+{
+	struct kvm_exit_mmio dev_attr_mmio;
+
+	dev_attr_mmio.len = 4;
+	if (find_matching_range(ranges, &dev_attr_mmio, offset))
+		return 0;
+	else
+		return -ENXIO;
+}
+
+static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	phys_addr_t offset;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+		switch (attr->attr) {
+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
+			return 0;
+		}
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_dist_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return 0;
+	}
+	return -ENXIO;
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+	return kvm_vgic_create(dev->kvm);
+}
+
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+	.name = "kvm-arm-vgic",
+	.create = vgic_create,
+	.destroy = vgic_destroy,
+	.set_attr = vgic_set_attr,
+	.get_attr = vgic_get_attr,
+	.has_attr = vgic_has_attr,
+};
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd03511..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
 #include "async_pf.h"
 #include <trace/events/kvm.h>
 
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+					       struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+						struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
 static struct kmem_cache *async_pf_cache;
 
 int kvm_async_pf_init(void)
@@ -56,7 +71,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-	struct page *page = NULL;
 	struct kvm_async_pf *apf =
 		container_of(work, struct kvm_async_pf, work);
 	struct mm_struct *mm = apf->mm;
@@ -66,16 +80,13 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	use_mm(mm);
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
-	unuse_mm(mm);
+	kvm_async_page_present_sync(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
-	apf->page = page;
-	apf->done = true;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -83,12 +94,12 @@ static void async_pf_execute(struct work_struct *work)
 	 * this point
 	 */
 
-	trace_kvm_async_pf_completed(addr, page, gva);
+	trace_kvm_async_pf_completed(addr, gva);
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	mmdrop(mm);
+	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
 }
 
@@ -99,10 +110,17 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		struct kvm_async_pf *work =
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
-		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) /* work was canceled */
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+		flush_work(&work->work);
+#else
+		if (cancel_work_sync(&work->work)) {
+			mmput(work->mm);
+			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
+		}
+#endif
 	}
 
 	spin_lock(&vcpu->async_pf.lock);
@@ -111,8 +129,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 	spin_unlock(&vcpu->async_pf.lock);
@@ -132,19 +148,16 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		list_del(&work->link);
 		spin_unlock(&vcpu->async_pf.lock);
 
-		if (work->page)
-			kvm_arch_async_page_ready(vcpu, work);
-		kvm_arch_async_page_present(vcpu, work);
+		kvm_arch_async_page_ready(vcpu, work);
+		kvm_async_page_present_async(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 }
 
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch)
 {
 	struct kvm_async_pf *work;
@@ -162,14 +175,13 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	if (!work)
 		return 0;
 
-	work->page = NULL;
-	work->done = false;
+	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
-	work->addr = gfn_to_hva(vcpu->kvm, gfn);
+	work->addr = hva;
 	work->arch = *arch;
 	work->mm = current->mm;
-	atomic_inc(&work->mm->mm_count);
+	atomic_inc(&work->mm->mm_users);
 	kvm_get_kvm(work->vcpu->kvm);
 
 	/* this can't really happen otherwise gfn_to_pfn_async
@@ -187,7 +199,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	return 1;
 retry_sync:
 	kvm_put_kvm(work->vcpu->kvm);
-	mmdrop(work->mm);
+	mmput(work->mm);
 	kmem_cache_free(async_pf_cache, work);
 	return 0;
 }
@@ -203,7 +215,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 	if (!work)
 		return -ENOMEM;
 
-	work->page = KVM_ERR_PTR_BAD_PAGE;
+	work->wakeup_all = true;
 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
 	spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 64ee720b75c7..71ed39941b9c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,11 +31,14 @@
 #include <linux/list.h>
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
+#include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/seqlock.h>
+#include <trace/events/kvm.h>
 
 #include "iodev.h"
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -74,7 +77,8 @@ struct _irqfd {
 	struct kvm *kvm;
 	wait_queue_t wait;
 	/* Update side is protected by irqfds.lock */
-	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+	struct kvm_kernel_irq_routing_entry irq_entry;
+	seqcount_t irq_entry_sc;
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
@@ -118,19 +122,22 @@ static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct _irqfd_resampler *resampler;
+	struct kvm *kvm;
 	struct _irqfd *irqfd;
+	int idx;
 
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	kvm = resampler->kvm;
 
-	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 		    resampler->notifier.gsi, 0, false);
 
-	rcu_read_lock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
 
 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
 		eventfd_signal(irqfd->resamplefd, 1);
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 static void
@@ -142,7 +149,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 	mutex_lock(&kvm->irqfds.resampler_lock);
 
 	list_del_rcu(&irqfd->resampler_link);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
 		list_del(&resampler->link);
@@ -219,19 +226,24 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 	unsigned long flags = (unsigned long)key;
-	struct kvm_kernel_irq_routing_entry *irq;
+	struct kvm_kernel_irq_routing_entry irq;
 	struct kvm *kvm = irqfd->kvm;
+	unsigned seq;
+	int idx;
 
 	if (flags & POLLIN) {
-		rcu_read_lock();
-		irq = rcu_dereference(irqfd->irq_entry);
+		idx = srcu_read_lock(&kvm->irq_srcu);
+		do {
+			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+			irq = irqfd->irq_entry;
+		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
 		/* An event has been signaled, inject an interrupt */
-		if (irq)
-			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+		if (irq.type == KVM_IRQ_ROUTING_MSI)
+			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
-		rcu_read_unlock();
+		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
 
 	if (flags & POLLHUP) {
@@ -267,34 +279,37 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	int i, n_entries;
 
-	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
-		rcu_assign_pointer(irqfd->irq_entry, NULL);
-		return;
-	}
+	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
+
+	write_seqcount_begin(&irqfd->irq_entry_sc);
+
+	irqfd->irq_entry.type = 0;
 
-	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+	e = entries;
+	for (i = 0; i < n_entries; ++i, ++e) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
-			rcu_assign_pointer(irqfd->irq_entry, e);
-		else
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			irqfd->irq_entry = *e;
 	}
+
+	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
+	int idx;
 
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
@@ -305,6 +320,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+	seqcount_init(&irqfd->irq_entry_sc);
 
 	file = eventfd_fget(args->fd);
 	if (IS_ERR(file)) {
@@ -363,7 +379,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		}
 
 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
-		synchronize_rcu();
+		synchronize_srcu(&kvm->irq_srcu);
 
 		mutex_unlock(&kvm->irqfds.resampler_lock);
 	}
@@ -387,9 +403,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		goto fail;
 	}
 
-	irq_rt = rcu_dereference_protected(kvm->irq_routing,
-					   lockdep_is_held(&kvm->irqfds.lock));
-	irqfd_update(kvm, irqfd, irq_rt);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irqfd_update(kvm, irqfd);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	events = file->f_op->poll(file, &irqfd->pt);
 
@@ -428,12 +444,73 @@ fail:
 	kfree(irqfd);
 	return ret;
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
 #endif
 
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -442,7 +519,7 @@ kvm_eventfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * shutdown any irqfd's that match fd+gsi
  */
@@ -461,14 +538,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
 		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
 			/*
-			 * This rcu_assign_pointer is needed for when
+			 * This clearing of irq_entry.type is needed for when
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_rcu done by caller
-			 * of that function.
 			 */
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			write_seqcount_begin(&irqfd->irq_entry_sc);
+			irqfd->irq_entry.type = 0;
+			write_seqcount_end(&irqfd->irq_entry_sc);
 			irqfd_deactivate(irqfd);
 		}
 	}
@@ -523,20 +600,17 @@ kvm_irqfd_release(struct kvm *kvm)
 }
 
 /*
- * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Take note of a change in irq routing.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
-void kvm_irq_routing_update(struct kvm *kvm,
-			    struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
-		irqfd_update(kvm, irqfd, irq_rt);
+		irqfd_update(kvm, irqfd);
 
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 5eaf18f90e83..b47541dd798f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -519,7 +519,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	return 0;
 }
 
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 {
 	int i;
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 615d8c995c3c..90d43e95dcf8 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		       int level, bool line_status);
 void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a650aa48c786..36f4e82c6b24 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -203,11 +203,7 @@ int kvm_assign_device(struct kvm *kvm,
 
 	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm assign device\n");
 
 	return 0;
 out_unmap:
@@ -233,11 +229,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm deassign device\n");
 
 	return 0;
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,9 +160,10 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
  */
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
-	struct kvm_irq_routing_table *irq_rt;
+	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -174,17 +175,15 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * Since there's no easy way to do this, we only support injecting MSI
 	 * which is limited to 1:1 GSI mapping.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
-			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-				ret = kvm_set_msi_inatomic(e, kvm);
-			else
-				ret = -EWOULDBLOCK;
-			break;
-		}
-	rcu_read_unlock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
 
@@ -253,26 +252,25 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	int gsi;
+	int idx, gsi;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -303,7 +301,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		if (e->irqchip.pin >= max_pin)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
@@ -322,13 +319,13 @@ out:
 
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
 #ifdef CONFIG_X86
 #  define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
 #  define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
 #else
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..7f256f31df10 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,69 +26,47 @@
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi;
-
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				rcu_read_unlock();
-				return true;
-			}
-
-	rcu_read_unlock();
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
 
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-	struct kvm_irq_ack_notifier *kian;
-	int gsi;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	rcu_read_unlock();
-}
+	struct kvm_irq_routing_table *irq_rt;
+	struct kvm_kernel_irq_routing_entry *e;
+	int n = 0;
+
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
+	if (gsi < irq_rt->nr_rt_entries) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			entries[n] = *e;
+			++n;
+		}
+	}
 
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+	return n;
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+	struct kvm_irq_routing_table *irq_rt;
+
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	return irq_rt->chip[irqchip][pin];
 }
 
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
@@ -114,9 +92,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
-	struct kvm_irq_routing_table *irq_rt;
+	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i, idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -124,12 +101,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
-	rcu_read_unlock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	i = kvm_irq_map_gsi(kvm, irq_set, irq);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
 		int r;
@@ -170,9 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
-	r = kvm_set_routing_entry(rt, e, ue);
+	r = kvm_set_routing_entry(e, ue);
 	if (r)
 		goto out;
+	if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+		rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
 	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
@@ -223,10 +199,11 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
+	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
-	synchronize_rcu();
+	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a17f190be58e..4eed4cd9b58b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -71,7 +71,8 @@ MODULE_LICENSE("GPL");
  * 		kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -95,36 +96,22 @@ static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new, u64 last_generation);
 
-bool kvm_rebooting;
+static void kvm_release_pfn_dirty(pfn_t pfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot, gfn_t gfn);
+
+__visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
 
 	return true;
 }
@@ -143,7 +130,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 		struct pid *oldpid = vcpu->pid;
 		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 		rcu_assign_pointer(vcpu->pid, newpid);
-		synchronize_rcu();
+		if (oldpid)
+			synchronize_rcu();
 		put_pid(oldpid);
 	}
 	cpu = get_cpu();
@@ -470,14 +458,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 	r = hardware_enable_all();
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
@@ -486,10 +476,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	r = -ENOMEM;
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
+	if (init_srcu_struct(&kvm->irq_srcu))
+		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
 					GFP_KERNEL);
@@ -511,17 +503,19 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	return kvm;
 
 out_err:
+	cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
 	cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
 	hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);
@@ -561,24 +555,24 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
 
-void kvm_free_physmem(struct kvm *kvm)
+static void kvm_free_physmem(struct kvm *kvm)
 {
 	struct kvm_memslots *slots = kvm->memslots;
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -602,9 +596,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
@@ -617,6 +611,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	kvm_free_physmem(kvm);
+	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
 	hardware_disable_all();
@@ -694,8 +689,9 @@ static void sort_memslots(struct kvm_memslots *slots)
 		slots->id_to_index[slots->memslots[i].id] = i;
 }
 
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation)
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new,
+			    u64 last_generation)
 {
 	if (new) {
 		int id = new->id;
@@ -714,7 +710,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 
@@ -732,7 +728,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	update_memslots(slots, new, kvm->memslots->generation);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
-	return old_memslots; 
+
+	kvm_arch_memslots_updated(kvm);
+
+	return old_memslots;
 }
 
 /*
@@ -780,7 +779,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
-	r = -EINVAL;
 	if (npages > KVM_MEM_MAX_NR_PAGES)
 		goto out;
 
@@ -794,7 +792,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new.npages = npages;
 	new.flags = mem->flags;
 
-	r = -EINVAL;
 	if (npages) {
 		if (!old.npages)
 			change = KVM_MR_CREATE;
@@ -839,7 +836,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -850,7 +847,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		r = -ENOMEM;
 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
 				GFP_KERNEL);
 		if (!slots)
@@ -890,6 +886,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			goto out_free;
 	}
 
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (change == KVM_MR_DELETE) {
+		new.dirty_bitmap = NULL;
+		memset(&new.arch, 0, sizeof(new.arch));
+	}
+
+	old_memslots = install_new_memslots(kvm, slots, &new);
+
+	kvm_arch_commit_memory_region(kvm, mem, &old, change);
+
+	kvm_free_physmem_slot(kvm, &old, &new);
+	kfree(old_memslots);
+
 	/*
 	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
 	 * un-mapped and re-mapped if their base changes.  Since base change
@@ -901,29 +910,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	 */
 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
 		r = kvm_iommu_map_pages(kvm, &new);
-		if (r)
-			goto out_slots;
-	}
-
-	/* actual memory is freed via old in kvm_free_physmem_slot below */
-	if (change == KVM_MR_DELETE) {
-		new.dirty_bitmap = NULL;
-		memset(&new.arch, 0, sizeof(new.arch));
+		return r;
 	}
 
-	old_memslots = install_new_memslots(kvm, slots, &new);
-
-	kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
-	kvm_free_physmem_slot(&old, &new);
-	kfree(old_memslots);
-
 	return 0;
 
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }
@@ -941,8 +936,8 @@ int kvm_set_memory_region(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem)
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+					  struct kvm_userspace_memory_region *mem)
 {
 	if (mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
@@ -1063,7 +1058,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 }
 
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
-				 gfn_t gfn)
+					gfn_t gfn)
 {
 	return gfn_to_hva_many(slot, gfn, NULL);
 }
@@ -1076,12 +1071,25 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
  */
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+	if (!kvm_is_error_hva(hva) && writable)
+		*writable = !memslot_is_readonly(slot);
+
+	return hva;
+}
+
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1397,18 +1405,11 @@ void kvm_release_page_dirty(struct page *page)
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
 
-void kvm_release_pfn_dirty(pfn_t pfn)
+static void kvm_release_pfn_dirty(pfn_t pfn)
 {
 	kvm_set_pfn_dirty(pfn);
 	kvm_release_pfn_clean(pfn);
 }
-EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
-
-void kvm_set_page_dirty(struct page *page)
-{
-	kvm_set_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
@@ -1448,7 +1449,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 	int r;
 	unsigned long addr;
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1486,7 +1487,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	int offset = offset_in_page(gpa);
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	pagefault_disable();
@@ -1625,8 +1626,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-	return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-				    offset, len);
+	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+	return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
 
@@ -1649,8 +1651,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn)
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot,
+				    gfn_t gfn)
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1717,14 +1720,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
-	if (!need_resched())
-		return;
-	cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
@@ -1734,7 +1729,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
 	if (pid)
-		task = get_pid_task(target->pid, PIDTYPE_PID);
+		task = get_pid_task(pid, PIDTYPE_PID);
 	rcu_read_unlock();
 	if (!task)
 		return ret;
@@ -1749,7 +1744,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
 
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 /*
  * Helper that checks whether a VCPU is eligible for directed yield.
  * Most eligible candidate to yield is decided by following heuristics:
@@ -1772,20 +1766,22 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  *  locking does not harm. It may result in trying to yield to  same VCPU, fail
  *  and continue with next VCPU and so on.
  */
-bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 {
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
-			(vcpu->spin_loop.in_spin_loop &&
-			 vcpu->spin_loop.dy_eligible);
+		    vcpu->spin_loop.dy_eligible;
 
 	if (vcpu->spin_loop.in_spin_loop)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
 
 	return eligible;
-}
+#else
+	return true;
 #endif
+}
 
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
@@ -1816,7 +1812,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (waitqueue_active(&vcpu->wq))
+			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;
@@ -1894,7 +1890,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
 /*
@@ -2269,6 +2265,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+#ifdef CONFIG_KVM_MPIC
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
+#endif
+
+#ifdef CONFIG_KVM_XICS
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
+#endif
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
 static int kvm_ioctl_create_device(struct kvm *kvm,
 				   struct kvm_create_device *cd)
 {
@@ -2277,21 +2296,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
 	int ret;
 
-	switch (cd->type) {
-#ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
-#endif
-	default:
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;
@@ -2309,7 +2319,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 		return ret;
 	}
 
-	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
 		ops->destroy(dev);
 		return ret;
@@ -2321,6 +2331,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2484,6 +2522,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2537,44 +2578,12 @@ out:
 }
 #endif
 
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page[1];
-	unsigned long addr;
-	int npages;
-	gfn_t gfn = vmf->pgoff;
-	struct kvm *kvm = vma->vm_file->private_data;
-
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return VM_FAULT_SIGBUS;
-
-	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-				NULL);
-	if (unlikely(npages != 1))
-		return VM_FAULT_SIGBUS;
-
-	vmf->page = page[0];
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
-	.fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_ops = &kvm_vm_vm_ops;
-	return 0;
-}
-
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
-	.mmap           = kvm_vm_mmap,
 	.llseek		= noop_llseek,
 };
 
@@ -2593,40 +2602,13 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
 	if (r < 0)
 		kvm_put_kvm(kvm);
 
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_dev_ioctl_check_extension(arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
@@ -2634,7 +2616,6 @@ static long kvm_dev_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_GET_API_VERSION:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = KVM_API_VERSION;
@@ -2643,10 +2624,9 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = PAGE_SIZE;     /* struct kvm_run */
@@ -2691,7 +2671,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2701,11 +2681,12 @@ static void hardware_enable_nolock(void *junk)
 	}
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
-	raw_spin_lock(&kvm_lock);
-	hardware_enable_nolock(junk);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
+	if (kvm_usage_count)
+		hardware_enable_nolock(NULL);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2715,14 +2696,15 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
-	raw_spin_lock(&kvm_lock);
-	hardware_disable_nolock(junk);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
+	if (kvm_usage_count)
+		hardware_disable_nolock(NULL);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2736,16 +2718,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	hardware_disable_all_nolock();
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
 	int r = 0;
 
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 
 	kvm_usage_count++;
 	if (kvm_usage_count == 1) {
@@ -2758,7 +2740,7 @@ static int hardware_enable_all(void)
 		}
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 
 	return r;
 }
@@ -2768,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
 	int cpu = (long)v;
 
-	if (!kvm_usage_count)
-		return NOTIFY_OK;
-
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
 		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_disable(NULL);
+		hardware_disable();
 		break;
 	case CPU_STARTING:
 		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_enable(NULL);
+		hardware_enable();
 		break;
 	}
 	return NOTIFY_OK;
@@ -2994,10 +2973,10 @@ static int vm_stat_get(void *_offset, u64 *val)
 	struct kvm *kvm;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		*val += *(u32 *)((void *)kvm + offset);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
@@ -3011,12 +2990,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	int i;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			*val += *(u32 *)((void *)vcpu + offset);
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
@@ -3029,7 +3008,7 @@ static const struct file_operations *stat_fops[] = {
 
 static int kvm_init_debug(void)
 {
-	int r = -EFAULT;
+	int r = -EEXIST;
 	struct kvm_stats_debugfs_item *p;
 
 	kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@ -3071,7 +3050,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
 	if (kvm_usage_count) {
-		WARN_ON(raw_spin_is_locked(&kvm_lock));
+		WARN_ON(raw_spin_is_locked(&kvm_count_lock));
 		hardware_enable_nolock(NULL);
 	}
 }
@@ -3093,6 +3072,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 
@@ -3188,6 +3169,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 out_undebugfs:
 	unregister_syscore_ops(&kvm_syscore_ops);
+	misc_deregister(&kvm_dev);
 out_unreg:
 	kvm_async_pf_deinit();
 out_free:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..475487e238e1
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,228 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+	struct list_head node;
+	struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+	struct list_head group_list;
+	struct mutex lock;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+	struct vfio_group *vfio_group;
+	struct vfio_group *(*fn)(struct file *);
+
+	fn = symbol_get(vfio_group_get_external_user);
+	if (!fn)
+		return ERR_PTR(-EINVAL);
+
+	vfio_group = fn(filep);
+
+	symbol_put(vfio_group_get_external_user);
+
+	return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+	void (*fn)(struct vfio_group *);
+
+	fn = symbol_get(vfio_group_put_external_user);
+	if (!fn)
+		return;
+
+	fn(vfio_group);
+
+	symbol_put(vfio_group_put_external_user);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	void __user *argp = (void __user *)arg;
+	struct fd f;
+	int32_t fd;
+	int ret;
+
+	switch (attr) {
+	case KVM_DEV_VFIO_GROUP_ADD:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group == vfio_group) {
+				mutex_unlock(&kv->lock);
+				kvm_vfio_group_put_external_user(vfio_group);
+				return -EEXIST;
+			}
+		}
+
+		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+		if (!kvg) {
+			mutex_unlock(&kv->lock);
+			kvm_vfio_group_put_external_user(vfio_group);
+			return -ENOMEM;
+		}
+
+		list_add_tail(&kvg->node, &kv->group_list);
+		kvg->vfio_group = vfio_group;
+
+		mutex_unlock(&kv->lock);
+
+		return 0;
+
+	case KVM_DEV_VFIO_GROUP_DEL:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		ret = -ENOENT;
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group != vfio_group)
+				continue;
+
+			list_del(&kvg->node);
+			kvm_vfio_group_put_external_user(kvg->vfio_group);
+			kfree(kvg);
+			ret = 0;
+			break;
+		}
+
+		mutex_unlock(&kv->lock);
+
+		kvm_vfio_group_put_external_user(vfio_group);
+
+		return ret;
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		switch (attr->attr) {
+		case KVM_DEV_VFIO_GROUP_ADD:
+		case KVM_DEV_VFIO_GROUP_DEL:
+			return 0;
+		}
+
+		break;
+	}
+
+	return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_group *kvg, *tmp;
+
+	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		list_del(&kvg->node);
+		kfree(kvg);
+	}
+
+	kfree(kv);
+	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+	struct kvm_device *tmp;
+	struct kvm_vfio *kv;
+
+	/* Only one VFIO "device" per VM */
+	list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+		if (tmp->ops == &kvm_vfio_ops)
+			return -EBUSY;
+
+	kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+	if (!kv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&kv->group_list);
+	mutex_init(&kv->lock);
+
+	dev->private = kv;
+
+	return 0;
+}
+
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);