344 files changed, 64815 insertions, 441 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index cd077ca0e1b8..bd3f803a4e06 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -435,6 +435,8 @@ sysrq.txt
 	- info on the magic SysRq key.
 target/
 	- directory with info on generating TCM v4 fabric .ko modules
+tee.txt
+	- info on the TEE subsystem and drivers
 this_cpu_ops.txt
 	- List rationale behind and the way to use this_cpu operations.
 thermal/
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index 3a07a87fef20..6aca64f289b6 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -242,6 +242,23 @@ nodes to be present and contain the properties described below.
 		Definition: Specifies the syscon node controlling the cpu core
 			    power domains.
 
+	- dynamic-power-coefficient
+		Usage: optional
+		Value type: <prop-encoded-array>
+		Definition: A u32 value that represents the running time dynamic
+			    power coefficient in units of mW/MHz/uVolt^2. The
+			    coefficient can either be calculated from power
+			    measurements or derived by analysis.
+
+			    The dynamic power consumption of the CPU  is
+			    proportional to the square of the Voltage (V) and
+			    the clock frequency (f). The coefficient is used to
+			    calculate the dynamic power as below -
+
+			    Pdyn = dynamic-power-coefficient * V^2 * f
+
+			    where voltage is in uV, frequency is in MHz.
+
 Example 1 (dual-cluster big.LITTLE system 32-bit):
 
 	cpus {
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
new file mode 100644
index 000000000000..d38834c67dff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
@@ -0,0 +1,31 @@
+OP-TEE Device Tree Bindings
+
+OP-TEE is a piece of software using hardware features to provide a Trusted
+Execution Environment. The security can be provided with ARM TrustZone, but
+also by virtualization or a separate chip.
+
+We're using "linaro" as the first part of the compatible property for
+the reference implementation maintained by Linaro.
+
+* OP-TEE based on ARM TrustZone required properties:
+
+- compatible     : should contain "linaro,optee-tz"
+
+- method         : The method of calling the OP-TEE Trusted OS. Permitted
+                   values are:
+
+                   "smc" : SMC #0, with the register assignments specified
+		           in drivers/tee/optee/optee_smc.h
+
+                   "hvc" : HVC #0, with the register assignments specified
+		           in drivers/tee/optee/optee_smc.h
+
+
+
+Example:
+	firmware {
+		optee {
+			compatible = "linaro,optee-tz";
+			method = "smc";
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt b/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt
new file mode 100644
index 000000000000..d270bfe4e4e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/hisilicon/dw-dsi.txt
@@ -0,0 +1,72 @@
+Device-Tree bindings for DesignWare DSI Host Controller v1.20a driver
+
+A DSI Host Controller resides in the middle of display controller and external
+HDMI converter or panel.
+
+Required properties:
+- compatible: value should be "hisilicon,hi6220-dsi".
+- reg: physical base address and length of dsi controller's registers.
+- clocks: contains APB clock phandle + clock-specifier pair.
+- clock-names: should be "pclk".
+- ports: contains DSI controller input and output sub port.
+  The input port connects to ADE output port with the reg value "0".
+  The output port with the reg value "1", it could connect to panel or
+  any other bridge endpoints.
+  See Documentation/devicetree/bindings/graph.txt for more device graph info.
+
+A example of HiKey board hi6220 SoC and board specific DT entry:
+Example:
+
+SoC specific:
+	dsi: dsi@f4107800 {
+		compatible = "hisilicon,hi6220-dsi";
+		reg = <0x0 0xf4107800 0x0 0x100>;
+		clocks = <&media_ctrl  HI6220_DSI_PCLK>;
+		clock-names = "pclk";
+		status = "disabled";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			/* 0 for input port */
+			port@0 {
+				reg = <0>;
+				dsi_in: endpoint {
+					remote-endpoint = <&ade_out>;
+				};
+			};
+		};
+	};
+
+
+Board specific:
+	&dsi {
+		status = "ok";
+
+		ports {
+			/* 1 for output port */
+			port@1 {
+				reg = <1>;
+
+				dsi_out0: endpoint@0 {
+					remote-endpoint = <&adv7533_in>;
+				};
+			};
+		};
+	};
+
+	&i2c2 {
+		...
+
+		adv7533: adv7533@39 {
+			...
+
+			port {
+				adv7533_in: endpoint {
+					remote-endpoint = <&dsi_out0>;
+				};
+			};
+		};
+	};
+
diff --git a/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt b/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt
new file mode 100644
index 000000000000..38dc9d60eef8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/hisilicon/hisi-ade.txt
@@ -0,0 +1,64 @@
+Device-Tree bindings for hisilicon ADE display controller driver
+
+ADE (Advanced Display Engine) is the display controller which grab image
+data from memory, do composition, do post image processing, generate RGB
+timing stream and transfer to DSI.
+
+Required properties:
+- compatible: value should be "hisilicon,hi6220-ade".
+- reg: physical base address and length of the ADE controller's registers.
+- hisilicon,noc-syscon: ADE NOC QoS syscon.
+- resets: The ADE reset controller node.
+- interrupt: the ldi vblank interrupt number used.
+- clocks: a list of phandle + clock-specifier pairs, one for each entry
+  in clock-names.
+- clock-names: should contain:
+  "clk_ade_core" for the ADE core clock.
+  "clk_codec_jpeg" for the media NOC QoS clock, which use the same clock with
+  jpeg codec.
+  "clk_ade_pix" for the ADE pixel clok.
+- assigned-clocks: Should contain "clk_ade_core" and "clk_codec_jpeg" clocks'
+  phandle + clock-specifier pairs.
+- assigned-clock-rates: clock rates, one for each entry in assigned-clocks.
+  The rate of "clk_ade_core" could be "360000000" or "180000000";
+  The rate of "clk_codec_jpeg" could be or less than "1440000000".
+  These rate values could be configured according to performance and power
+  consumption.
+- port: the output port. This contains one endpoint subnode, with its
+  remote-endpoint set to the phandle of the connected DSI input endpoint.
+  See Documentation/devicetree/bindings/graph.txt for more device graph info.
+
+Optional properties:
+- dma-coherent: Present if dma operations are coherent.
+
+
+A example of HiKey board hi6220 SoC specific DT entry:
+Example:
+
+	ade: ade@f4100000 {
+		compatible = "hisilicon,hi6220-ade";
+		reg = <0x0 0xf4100000 0x0 0x7800>;
+		reg-names = "ade_base";
+		hisilicon,noc-syscon = <&medianoc_ade>;
+		resets = <&media_ctrl MEDIA_ADE>;
+		interrupts = <0 115 4>; /* ldi interrupt */
+
+		clocks = <&media_ctrl HI6220_ADE_CORE>,
+			 <&media_ctrl HI6220_CODEC_JPEG>,
+			 <&media_ctrl HI6220_ADE_PIX_SRC>;
+		/*clock name*/
+		clock-names  = "clk_ade_core",
+			       "clk_codec_jpeg",
+			       "clk_ade_pix";
+
+		assigned-clocks = <&media_ctrl HI6220_ADE_CORE>,
+			<&media_ctrl HI6220_CODEC_JPEG>;
+		assigned-clock-rates = <360000000>, <288000000>;
+		dma-coherent;
+
+		port {
+			ade_out: endpoint {
+				remote-endpoint = <&dsi_in>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/mailbox/hisilicon,hi6220-mailbox.txt b/Documentation/devicetree/bindings/mailbox/hisilicon,hi6220-mailbox.txt
new file mode 100644
index 000000000000..3dfb0b0ecd33
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/hisilicon,hi6220-mailbox.txt
@@ -0,0 +1,57 @@
+Hisilicon Hi6220 Mailbox Driver
+===============================
+
+Hisilicon Hi6220 mailbox supports up to 32 channels. Each channel
+is unidirectional with a maximum message size of 8 words. I/O is
+performed using register access (there is no DMA) and the cell
+raises an interrupt when messages are received.
+
+Mailbox Device Node:
+====================
+
+Required properties:
+--------------------
+- compatible:		Shall be "hisilicon,hi6220-mbox"
+- reg:			Contains the mailbox register address range (base
+			address and length); the first item is for IPC
+			registers, the second item is shared buffer for
+			slots.
+- #mbox-cells		Common mailbox binding property to identify the number
+			of cells required for the mailbox specifier. Should be 1.
+- interrupts:		Contains the interrupt information for the mailbox
+			device. The format is dependent on which interrupt
+			controller the SoCs use.
+
+Example:
+--------
+
+	mailbox: mailbox@F7510000 {
+		#mbox-cells = <1>;
+		compatible = "hisilicon,hi6220-mbox";
+		reg = <0x0 0xF7510000 0x0 0x1000>, /* IPC_S */
+		      <0x0 0x06DFF800 0x0 0x0800>; /* Mailbox */
+		interrupt-parent = <&gic>;
+		interrupts = <0 94 4>;
+	};
+
+
+Mailbox client
+===============
+
+"mboxes" and the optional "mbox-names" (please see
+Documentation/devicetree/bindings/mailbox/mailbox.txt for details). Each value
+of the mboxes property should contain a phandle to the mailbox controller
+device node and second argument is the channel index. It must be 0 (hardware
+support only one channel). The equivalent "mbox-names" property value can be
+used to give a name to the communication channel to be used by the client user.
+
+Example:
+--------
+
+	stub_clock: stub_clock {
+		compatible = "hisilicon,hi6220-stub-clk";
+		hisilicon,hi6220-clk-sram = <&sram>;
+		#clock-cells = <1>;
+		mbox-names = "mbox-tx";
+		mboxes = <&mailbox 1>;
+	};
diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
new file mode 100644
index 000000000000..5edc310470b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
@@ -0,0 +1,27 @@
+Hisilicon hi655x Power Management Integrated Circuit (PMIC)
+
+The hardware layout for access PMIC Hi655x from AP SoC Hi6220.
+Between PMIC Hi655x and Hi6220, the physical signal channel is SSI.
+We can use memory-mapped I/O to communicate.
+
++----------------+             +-------------+
+|                |             |             |
+|    Hi6220      |   SSI bus   |   Hi655x    |
+|                |-------------|             |
+|                |(REGMAP_MMIO)|             |
++----------------+             +-------------+
+
+Required properties:
+- compatible: Should be "hisilicon,hi655x-pmic"
+- reg: Base address of PMIC on hi6220 soc
+- interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
+- pmic-gpios: The gpio used by PMIC irq.
+
+Example:
+	pmic: pmic@f8000000 {
+		compatible = "hisilicon,hi655x-pmic";
+		reg = <0x0 0xf8000000 0x0 0x1000>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+	}
diff --git a/Documentation/devicetree/bindings/misc/ramoops.txt b/Documentation/devicetree/bindings/misc/ramoops.txt
new file mode 100644
index 000000000000..5a475fae4aab
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/ramoops.txt
@@ -0,0 +1,43 @@
+Ramoops oops/panic logger
+=========================
+
+ramoops provides persistent RAM storage for oops and panics, so they can be
+recovered after a reboot.
+
+Parts of this storage may be set aside for other persistent log buffers, such
+as kernel log messages, or for optional ECC error-correction data.  The total
+size of these optional buffers must fit in the reserved region.
+
+Any remaining space will be used for a circular buffer of oops and panic
+records.  These records have a configurable size, with a size of 0 indicating
+that they should be disabled.
+
+
+Required properties:
+
+- compatible: must be "ramoops"
+
+- memory-region: phandle to a region of memory that is preserved between reboots
+
+
+Optional properties:
+
+- ecc-size: enables ECC support and specifies ECC buffer size in bytes
+  (defaults to no ECC)
+
+- record-size: maximum size in bytes of each dump done on oops/panic
+  (defaults to 0)
+
+- console-size: size in bytes of log buffer reserved for kernel messages
+  (defaults to 0)
+
+- ftrace-size: size in bytes of log buffer reserved for function tracing and
+  profiling (defaults to 0)
+
+- pmsg-size: size in bytes of log buffer reserved for userspace messages
+  (defaults to 0)
+
+- unbuffered: if present, use unbuffered mappings to map the reserved region
+  (defaults to buffered mappings)
+
+- no-dump-oops: if present, only dump panics (defaults to panics and oops)
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 8636f5ae97e5..9b4896c11716 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -39,6 +39,10 @@ Required Properties:
 
 Optional properties:
 
+* resets: phandle + reset specifier pair, intended to represent hardware
+  reset signal present internally in some host controller IC designs.
+  See Documentation/devicetree/bindings/reset/reset.txt for details.
+
 * clocks: from common clock binding: handle to biu and ciu clocks for the
   bus interface unit clock and the card interface unit clock.
 
@@ -48,7 +52,7 @@ Optional properties:
   clock-frequency.  It is an error to omit both the ciu clock and the
   clock-frequency.
 
-* clock-frequency: should be the frequency (in Hz) of the ciu clock.  If this
+* clock-frequency: should be tke frequency (in Hz) of the ciu clock.  If this
   is specified and the ciu clock is specified then we'll try to set the ciu
   clock to this at probe time.
 
diff --git a/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt b/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt
new file mode 100644
index 000000000000..f17a56e2152f
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt
@@ -0,0 +1,16 @@
+Hisilicon hi6220 usb PHY
+-----------------------
+
+Required properties:
+- compatible: should be "hisilicon,hi6220-usb-phy"
+- #phy-cells: must be 0
+- hisilicon,peripheral-syscon: phandle of syscon used to control phy.
+Refer to phy/phy-bindings.txt for the generic PHY binding properties
+
+Example:
+	usb_phy: usbphy {
+		compatible = "hisilicon,hi6220-usb-phy";
+		#phy-cells = <0>;
+		phy-supply = <&fixed_5v_hub>;
+		hisilicon,peripheral-syscon = <&sys_ctrl>;
+	};
diff --git a/Documentation/devicetree/bindings/regulator/hisilicon,hi655x-regulator.txt b/Documentation/devicetree/bindings/regulator/hisilicon,hi655x-regulator.txt
new file mode 100644
index 000000000000..09d3884e7cc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/hisilicon,hi655x-regulator.txt
@@ -0,0 +1,28 @@
+Hisilicon Hi655x Voltage regulators
+
+Note:
+The hi655x regulator control is managed by hi655x Power IC.
+So the node of this regulator must be child node of hi655x
+PMIC node.
+
+The driver uses the regulator core framework, so please also
+take the bindings of regulator.txt for reference.
+
+The valid names for regulators are:
+
+LDO2 LDO7 LDO10 LDO13 LDO14 LDO15 LDO17 LDO19 LDO21 LDO22
+
+Example:
+        pmic: pmic@f8000000 {
+                compatible = "hisilicon,hi655x-pmic";
+		...
+		regulators {
+			ldo2: LDO2@a21 {
+				regulator-compatible = "LDO2_2V8";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <3200000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+			...
+		}
+	}
diff --git a/Documentation/devicetree/bindings/reset/hisilicon,hi6220-reset.txt b/Documentation/devicetree/bindings/reset/hisilicon,hi6220-reset.txt
new file mode 100644
index 000000000000..d0f91c557e27
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/hisilicon,hi6220-reset.txt
@@ -0,0 +1,36 @@
+Hisilicon System Reset Controller
+======================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+The reset controller registers are part of the system-ctl block on
+hi6220 SoC.
+
+Required properties:
+- compatible: should be one of the following:
+   "hisilicon,hi6220-sysctrl", "syscon" for peritheral reset,
+   "hisilicon,hi6220-pmctrl", "syscon" for media system reset.
+- reg: should be register base and length as documented in the
+  datasheet
+- #reset-cells: 1, see below
+
+Example:
+sys_ctrl: sys_ctrl@f7030000 {
+	compatible = "hisilicon,hi6220-sysctrl", "syscon";
+	reg = <0x0 0xf7030000 0x0 0x2000>;
+	#clock-cells = <1>;
+	#reset-cells = <1>;
+};
+
+Specifying reset lines connected to IP modules
+==============================================
+example:
+
+        uart1: serial@..... {
+                ...
+                resets = <&sys_ctrl PERIPH_RSTEN3_UART1>;
+                ...
+        };
+
+The index could be found in <dt-bindings/reset/hisi,hi6220-resets.h>.
diff --git a/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt b/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt
new file mode 100644
index 000000000000..11216f09e596
--- /dev/null
+++ b/Documentation/devicetree/bindings/scheduler/sched-energy-costs.txt
@@ -0,0 +1,360 @@
+===========================================================
+Energy cost bindings for Energy Aware Scheduling
+===========================================================
+
+===========================================================
+1 - Introduction
+===========================================================
+
+This note specifies bindings required for energy-aware scheduling
+(EAS)[1]. Historically, the scheduler's primary objective has been
+performance.  EAS aims to provide an alternative objective - energy
+efficiency. EAS relies on a simple platform energy cost model to
+guide scheduling decisions.  The model only considers the CPU
+subsystem.
+
+This note is aligned with the definition of the layout of physical
+CPUs in the system as described in the ARM topology binding
+description [2]. The concept is applicable to any system so long as
+the cost model data is provided for those processing elements in
+that system's topology that EAS is required to service.
+
+Processing elements refer to hardware threads, CPUs and clusters of
+related CPUs in increasing order of hierarchy.
+
+EAS requires two key cost metrics - busy costs and idle costs. Busy
+costs comprise of a list of compute capacities for the processing
+element in question and the corresponding power consumption at that
+capacity.  Idle costs comprise of a list of power consumption values
+for each idle state [C-state] that the processing element supports.
+For a detailed description of these metrics, their derivation and
+their use see [3].
+
+These cost metrics are required for processing elements in all
+scheduling domain levels that EAS is required to service.
+
+===========================================================
+2 - energy-costs node
+===========================================================
+
+Energy costs for the processing elements in scheduling domains that
+EAS is required to service are defined in the energy-costs node
+which acts as a container for the actual per processing element cost
+nodes. A single energy-costs node is required for a given system.
+
+- energy-costs node
+
+	Usage: Required
+
+	Description: The energy-costs node is a container node and
+	it's sub-nodes describe costs for each processing element at
+	all scheduling domain levels that EAS is required to
+	service.
+
+	Node name must be "energy-costs".
+
+	The energy-costs node's parent node must be the cpus node.
+
+	The energy-costs node's child nodes can be:
+
+	- one or more cost nodes.
+
+	Any other configuration is considered invalid.
+
+The energy-costs node can only contain a single type of child node
+whose bindings are described in paragraph 4.
+
+===========================================================
+3 - energy-costs node child nodes naming convention
+===========================================================
+
+energy-costs child nodes must follow a naming convention where the
+node name must be "thread-costN", "core-costN", "cluster-costN"
+depending on whether the costs in the node are for a thread, core or
+cluster.  N (where N = {0, 1, ...}) is the node number and has no
+bearing to the OS' logical thread, core or cluster index.
+
+===========================================================
+4 - cost node bindings
+===========================================================
+
+Bindings for cost nodes are defined as follows:
+
+- cluster-cost node
+
+	Description: must be declared within an energy-costs node. A
+	system can contain multiple clusters and each cluster
+	serviced by EAS	must have a corresponding cluster-costs
+	node.
+
+	The cluster-cost node name must be "cluster-costN" as
+	described in 3 above.
+
+	A cluster-cost node must be a leaf node with no children.
+
+	Properties for cluster-cost nodes are described in paragraph
+	5 below.
+
+	Any other configuration is considered invalid.
+
+- core-cost node
+
+	Description: must be declared within an energy-costs node. A
+	system can contain multiple cores and each core serviced by
+	EAS must have a corresponding core-cost node.
+
+	The core-cost node name must be "core-costN" as described in
+	3 above.
+
+	A core-cost node must be a leaf node with no children.
+
+	Properties for core-cost nodes are described in paragraph
+	5 below.
+
+	Any other configuration is considered invalid.
+
+- thread-cost node
+
+	Description: must be declared within an energy-costs node. A
+	system can contain cores with multiple hardware threads and
+	each thread serviced by EAS must have a corresponding
+	thread-cost node.
+
+	The core-cost node name must be "core-costN" as described in
+	3 above.
+
+	A core-cost node must be a leaf node with no children.
+
+	Properties for thread-cost nodes are described in paragraph
+	5 below.
+
+	Any other configuration is considered invalid.
+
+===========================================================
+5 - Cost node properties
+==========================================================
+
+All cost node types must have only the following properties:
+
+- busy-cost-data
+
+	Usage: required
+	Value type: An array of 2-item tuples. Each item is of type
+	u32.
+	Definition: The first item in the tuple is the capacity
+	value as described in [3]. The second item in the tuple is
+	the energy cost value as described in [3].
+
+- idle-cost-data
+
+	Usage: required
+	Value type: An array of 1-item tuples. The item is of type
+	u32.
+	Definition: The item in the tuple is the energy cost value
+	as described in [3].
+
+===========================================================
+4 - Extensions to the cpu node
+===========================================================
+
+The cpu node is extended with a property that establishes the
+connection between the processing element represented by the cpu
+node and the cost-nodes associated with this processing element.
+
+The connection is expressed in line with the topological hierarchy
+that this processing element belongs to starting with the level in
+the hierarchy that this processing element itself belongs to through
+to the highest level that EAS is required to service.  The
+connection cannot be sparse and must be contiguous from the
+processing element's level through to the highest desired level. The
+highest desired level must be the same for all processing elements.
+
+Example: Given that a cpu node may represent a thread that is a part
+of a core, this property may contain multiple elements which
+associate the thread with cost nodes describing the costs for the
+thread itself, the core the thread belongs to, the cluster the core
+belongs to and so on. The elements must be ordered from the lowest
+level nodes to the highest desired level that EAS must service. The
+highest desired level must be the same for all cpu nodes. The
+elements must not be sparse: there must be elements for the current
+thread, the next level of hierarchy (core) and so on without any
+'holes'.
+
+Example: Given that a cpu node may represent a core that is a part
+of a cluster of related cpus this property may contain multiple
+elements which associate the core with cost nodes describing the
+costs for the core itself, the cluster the core belongs to and so
+on. The elements must be ordered from the lowest level nodes to the
+highest desired level that EAS must service. The highest desired
+level must be the same for all cpu nodes. The elements must not be
+sparse: there must be elements for the current thread, the next
+level of hierarchy (core) and so on without any 'holes'.
+
+If the system comprises of hierarchical clusters of clusters, this
+property will contain multiple associations with the relevant number
+of cluster elements in hierarchical order.
+
+Property added to the cpu node:
+
+- sched-energy-costs
+
+	Usage: required
+	Value type: List of phandles
+	Definition: a list of phandles to specific cost nodes in the
+	energy-costs parent node that correspond to the processing
+	element represented by this cpu node in hierarchical order
+	of topology.
+
+	The order of phandles in the list is significant. The first
+	phandle is to the current processing element's own cost
+	node.  Subsequent phandles are to higher hierarchical level
+	cost nodes up until the maximum level that EAS is to
+	service.
+
+	All cpu nodes must have the same highest level cost node.
+
+	The phandle list must not be sparsely populated with handles
+	to non-contiguous hierarchical levels. See commentary above
+	for clarity.
+
+	Any other configuration is invalid.
+
+===========================================================
+5 - Example dts
+===========================================================
+
+Example 1 (ARM 64-bit, 6-cpu system, two clusters of cpus, one
+cluster of 2 Cortex-A57 cpus, one cluster of 4 Cortex-A53 cpus):
+
+cpus {
+	#address-cells = <2>;
+	#size-cells = <0>;
+	.
+	.
+	.
+	A57_0: cpu@0 {
+		compatible = "arm,cortex-a57","arm,armv8";
+		reg = <0x0 0x0>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A57_L2>;
+		clocks = <&scpi_dvfs 0>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
+	};
+
+	A57_1: cpu@1 {
+		compatible = "arm,cortex-a57","arm,armv8";
+		reg = <0x0 0x1>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A57_L2>;
+		clocks = <&scpi_dvfs 0>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
+	};
+
+	A53_0: cpu@100 {
+		compatible = "arm,cortex-a53","arm,armv8";
+		reg = <0x0 0x100>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A53_L2>;
+		clocks = <&scpi_dvfs 1>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
+	};
+
+	A53_1: cpu@101 {
+		compatible = "arm,cortex-a53","arm,armv8";
+		reg = <0x0 0x101>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A53_L2>;
+		clocks = <&scpi_dvfs 1>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
+	};
+
+	A53_2: cpu@102 {
+		compatible = "arm,cortex-a53","arm,armv8";
+		reg = <0x0 0x102>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A53_L2>;
+		clocks = <&scpi_dvfs 1>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
+	};
+
+	A53_3: cpu@103 {
+		compatible = "arm,cortex-a53","arm,armv8";
+		reg = <0x0 0x103>;
+		device_type = "cpu";
+		enable-method = "psci";
+		next-level-cache = <&A53_L2>;
+		clocks = <&scpi_dvfs 1>;
+		cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+		sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
+	};
+
+	energy-costs {
+		CPU_COST_0: core-cost0 {
+			busy-cost-data = <
+				417   168
+				579   251
+				744   359
+				883   479
+				1024  616
+			>;
+			idle-cost-data = <
+				15
+				0
+			>;
+		};
+		CPU_COST_1: core-cost1 {
+			busy-cost-data = <
+				235 33
+				302 46
+				368 61
+				406 76
+				447 93
+			>;
+			idle-cost-data = <
+				6
+				0
+			>;
+		};
+		CLUSTER_COST_0: cluster-cost0 {
+			busy-cost-data = <
+				417   24
+				579   32
+				744   43
+				883   49
+				1024  64
+			>;
+			idle-cost-data = <
+				65
+				24
+			>;
+		};
+		CLUSTER_COST_1: cluster-cost1 {
+			busy-cost-data = <
+				235 26
+				303 30
+				368 39
+				406 47
+				447 57
+			>;
+			idle-cost-data = <
+				56
+				17
+			>;
+		};
+	};
+};
+
+===============================================================================
+[1] https://lkml.org/lkml/2015/5/12/728
+[2] Documentation/devicetree/bindings/topology.txt
+[3] Documentation/scheduler/sched-energy.txt
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index fd132cbee70e..221368207ca4 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -4,6 +4,7 @@ Platform DesignWare HS OTG USB 2.0 controller
 Required properties:
 - compatible : One of:
   - brcm,bcm2835-usb: The DWC2 USB controller instance in the BCM2835 SoC.
+  - hisilicon,hi6220-usb: The DWC2 USB controller instance in the hi6220 SoC.
   - rockchip,rk3066-usb: The DWC2 USB controller instance in the rk3066 Soc;
   - "rockchip,rk3188-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3188 Soc;
   - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 55df1d444e9f..569e9c89080f 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -127,6 +127,7 @@ lacie	LaCie
 lantiq	Lantiq Semiconductor
 lenovo	Lenovo Group Ltd.
 lg	LG Corporation
+linaro	Linaro Limited
 linux	Linux-specific binding
 lsi	LSI Corp. (LSI Logic)
 lltc	Linear Technology Corporation
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 91261a32a573..b5ce7b6c3576 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -307,6 +307,7 @@ Code  Seq#(hex)	Include File		Comments
 0xA3	80-8F	Port ACL		in development:
 					<mailto:tlewis@mindspring.com>
 0xA3	90-9F	linux/dtlk.h
+0xA4	00-1F	uapi/linux/tee.h	Generic TEE subsystem
 0xAA	00-3F	linux/uapi/linux/userfaultfd.h
 0xAB	00-1F	linux/nbd.h
 0xAC	00-1F	linux/raw.h
diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt
index 5d8675615e59..9264bcab4099 100644
--- a/Documentation/ramoops.txt
+++ b/Documentation/ramoops.txt
@@ -45,7 +45,7 @@ corrupt, but usually it is restorable.
 
 2. Setting the parameters
 
-Setting the ramoops parameters can be done in 2 different manners:
+Setting the ramoops parameters can be done in 3 different manners:
  1. Use the module parameters (which have the names of the variables described
  as before).
  For quick debugging, you can also reserve parts of memory during boot
@@ -54,7 +54,9 @@ Setting the ramoops parameters can be done in 2 different manners:
  kernel to use only the first 128 MB of memory, and place ECC-protected ramoops
  region at 128 MB boundary:
  "mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1"
- 2. Use a platform device and set the platform data. The parameters can then
+ 2. Use Device Tree bindings, as described in
+ Documentation/device-tree/bindings/misc/ramoops.txt.
+ 3. Use a platform device and set the platform data. The parameters can then
  be set through that platform data. An example of doing that is:
 
 #include <linux/pstore_ram.h>
diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt
new file mode 100644
index 000000000000..dab2f9088b33
--- /dev/null
+++ b/Documentation/scheduler/sched-energy.txt
@@ -0,0 +1,362 @@
+Energy cost model for energy-aware scheduling (EXPERIMENTAL)
+
+Introduction
+=============
+
+The basic energy model uses platform energy data stored in sched_group_energy
+data structures attached to the sched_groups in the sched_domain hierarchy. The
+energy cost model offers two functions that can be used to guide scheduling
+decisions:
+
+1.	static unsigned int sched_group_energy(struct energy_env *eenv)
+2.	static int energy_diff(struct energy_env *eenv)
+
+sched_group_energy() estimates the energy consumed by all cpus in a specific
+sched_group including any shared resources owned exclusively by this group of
+cpus. Resources shared with other cpus are excluded (e.g. later level caches).
+
+energy_diff() estimates the total energy impact of a utilization change. That
+is, adding, removing, or migrating utilization (tasks).
+
+Both functions use a struct energy_env to specify the scenario to be evaluated:
+
+	struct energy_env {
+		struct sched_group      *sg_top;
+		struct sched_group      *sg_cap;
+		int                     cap_idx;
+		int                     util_delta;
+		int                     src_cpu;
+		int                     dst_cpu;
+		int                     energy;
+	};
+
+sg_top: sched_group to be evaluated. Not used by energy_diff().
+
+sg_cap: sched_group covering the cpus in the same frequency domain. Set by
+sched_group_energy().
+
+cap_idx: Capacity state to be used for energy calculations. Set by
+find_new_capacity().
+
+util_delta: Amount of utilization to be added, removed, or migrated.
+
+src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be
+-1 if no source (e.g. task wake-up).
+
+dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1
+if utilization is removed (e.g. terminating tasks).
+
+energy: Result of sched_group_energy().
+
+The metric used to represent utilization is the actual per-entity running time
+averaged over time using a geometric series. Very similar to the existing
+per-entity load-tracking, but _not_ scaled by task priority and capped by the
+capacity of the cpu. The latter property does mean that utilization may
+underestimate the compute requirements for task on fully/over utilized cpus.
+The greatest potential for energy savings without affecting performance too much
+is scenarios where the system isn't fully utilized. If the system is deemed
+fully utilized load-balancing should be done with task load (includes task
+priority) instead in the interest of fairness and performance.
+
+
+Background and Terminology
+===========================
+
+To make it clear from the start:
+
+energy = [joule] (resource like a battery on powered devices)
+power = energy/time = [joule/second] = [watt]
+
+The goal of energy-aware scheduling is to minimize energy, while still getting
+the job done. That is, we want to maximize:
+
+	performance [inst/s]
+	--------------------
+	    power [W]
+
+which is equivalent to minimizing:
+
+	energy [J]
+	-----------
+	instruction
+
+while still getting 'good' performance. It is essentially an alternative
+optimization objective to the current performance-only objective for the
+scheduler. This alternative considers two objectives: energy-efficiency and
+performance. Hence, there needs to be a user controllable knob to switch the
+objective. Since it is early days, this is currently a sched_feature
+(ENERGY_AWARE).
+
+The idea behind introducing an energy cost model is to allow the scheduler to
+evaluate the implications of its decisions rather than applying energy-saving
+techniques blindly that may only have positive effects on some platforms. At
+the same time, the energy cost model must be as simple as possible to minimize
+the scheduler latency impact.
+
+Platform topology
+------------------
+
+The system topology (cpus, caches, and NUMA information, not peripherals) is
+represented in the scheduler by the sched_domain hierarchy which has
+sched_groups attached at each level that covers one or more cpus (see
+sched-domains.txt for more details). To add energy awareness to the scheduler
+we need to consider power and frequency domains.
+
+Power domain:
+
+A power domain is a part of the system that can be powered on/off
+independently. Power domains are typically organized in a hierarchy where you
+may be able to power down just a cpu or a group of cpus along with any
+associated resources (e.g.  shared caches). Powering up a cpu means that all
+power domains it is a part of in the hierarchy must be powered up. Hence, it is
+more expensive to power up the first cpu that belongs to a higher level power
+domain than powering up additional cpus in the same high level domain. Two
+level power domain hierarchy example:
+
+		Power source
+		         +-------------------------------+----...
+per group PD		 G                               G
+		         |           +----------+        |
+		    +--------+-------| Shared   |  (other groups)
+per-cpu PD	    G        G       | resource |
+		    |        |       +----------+
+		+-------+ +-------+
+		| CPU 0 | | CPU 1 |
+		+-------+ +-------+
+
+Frequency domain:
+
+Frequency domains (P-states) typically cover the same group of cpus as one of
+the power domain levels. That is, there might be several smaller power domains
+sharing the same frequency (P-state) or there might be a power domain spanning
+multiple frequency domains.
+
+From a scheduling point of view there is no need to know the actual frequencies
+[Hz]. All the scheduler cares about is the compute capacity available at the
+current state (P-state) the cpu is in and any other available states. For that
+reason, and to also factor in any cpu micro-architecture differences, compute
+capacity scaling states are called 'capacity states' in this document. For SMP
+systems this is equivalent to P-states. For mixed micro-architecture systems
+(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture
+performance relative to the other cpus in the system.
+
+Energy modelling:
+------------------
+
+Due to the hierarchical nature of the power domains, the most obvious way to
+model energy costs is therefore to associate power and energy costs with
+domains (groups of cpus). Energy costs of shared resources are associated with
+the group of cpus that share the resources, only the cost of powering the
+cpu itself and any private resources (e.g. private L1 caches) is associated
+with the per-cpu groups (lowest level).
+
+For example, for an SMP system with per-cpu power domains and a cluster level
+(group of cpus) power domain we get the overall energy costs to be:
+
+	energy = energy_cluster + n * energy_cpu
+
+where 'n' is the number of cpus powered up and energy_cluster is the cost paid
+as soon as any cpu in the cluster is powered up.
+
+The power and frequency domains can naturally be mapped onto the existing
+sched_domain hierarchy and sched_groups by adding the necessary data to the
+existing data structures.
+
+The energy model considers energy consumption from two contributors (shown in
+the illustration below):
+
+1. Busy energy: Energy consumed while a cpu and the higher level groups that it
+belongs to are busy running tasks. Busy energy is associated with the state of
+the cpu, not an event. The time the cpu spends in this state varies. Thus, the
+most obvious platform parameter for this contribution is busy power
+(energy/time).
+
+2. Idle energy: Energy consumed while a cpu and higher level groups that it
+belongs to are idle (in a C-state). Like busy energy, idle energy is associated
+with the state of the cpu. Thus, the platform parameter for this contribution
+is idle power (energy/time).
+
+Energy consumed during transitions from an idle-state (C-state) to a busy state
+(P-state) or going the other way is ignored by the model to simplify the energy
+model calculations.
+
+
+	Power
+	^
+	|            busy->idle             idle->busy
+	|            transition             transition
+	|
+	|                _                      __
+	|               / \                    /  \__________________
+	|______________/   \                  /
+	|                   \                /
+	|  Busy              \    Idle      /        Busy
+	|  low P-state        \____________/         high P-state
+	|
+	+------------------------------------------------------------> time
+
+Busy    |--------------|                          |-----------------|
+
+Wakeup                 |------|            |------|
+
+Idle                          |------------|
+
+
+The basic algorithm
+====================
+
+The basic idea is to determine the total energy impact when utilization is
+added or removed by estimating the impact at each level in the sched_domain
+hierarchy starting from the bottom (sched_group contains just a single cpu).
+The energy cost comes from busy time (sched_group is awake because one or more
+cpus are busy) and idle time (in an idle-state). Energy model numbers account
+for energy costs associated with all cpus in the sched_group as a group.
+
+	for_each_domain(cpu, sd) {
+		sg = sched_group_of(cpu)
+		energy_before = curr_util(sg) * busy_power(sg)
+				+ (1-curr_util(sg)) * idle_power(sg)
+		energy_after = new_util(sg) * busy_power(sg)
+				+ (1-new_util(sg)) * idle_power(sg)
+		energy_diff += energy_before - energy_after
+
+	}
+
+	return energy_diff
+
+{curr, new}_util: The cpu utilization at the lowest level and the overall
+non-idle time for the entire group for higher levels. Utilization is in the
+range 0.0 to 1.0 in the pseudo-code.
+
+busy_power: The power consumption of the sched_group.
+
+idle_power: The power consumption of the sched_group when idle.
+
+Note: It is a fundamental assumption that the utilization is (roughly) scale
+invariant. Task utilization tracking factors in any frequency scaling and
+performance scaling differences due to difference cpu microarchitectures such
+that task utilization can be used across the entire system.
+
+
+Platform energy data
+=====================
+
+struct sched_group_energy can be attached to sched_groups in the sched_domain
+hierarchy and has the following members:
+
+cap_states:
+	List of struct capacity_state representing the supported capacity states
+	(P-states). struct capacity_state has two members: cap and power, which
+	represents the compute capacity and the busy_power of the state. The
+	list must be ordered by capacity low->high.
+
+nr_cap_states:
+	Number of capacity states in cap_states list.
+
+idle_states:
+	List of struct idle_state containing idle_state power cost for each
+	idle-state supported by the system orderd by shallowest state first.
+	All states must be included at all level in the hierarchy, i.e. a
+	sched_group spanning just a single cpu must also include coupled
+	idle-states (cluster states). In addition to the cpuidle idle-states,
+	the list must also contain an entry for the idling using the arch
+	default idle (arch_idle_cpu()). Despite this state may not be a true
+	hardware idle-state it is considered the shallowest idle-state in the
+	energy model and must be the first entry. cpus may enter this state
+	(possibly 'active idling') if cpuidle decides not enter a cpuidle
+	idle-state. Default idle may not be used when cpuidle is enabled.
+	In this case, it should just be a copy of the first cpuidle idle-state.
+
+nr_idle_states:
+	Number of idle states in idle_states list.
+
+There are no unit requirements for the energy cost data. Data can be normalized
+with any reference, however, the normalization must be consistent across all
+energy cost data. That is, one bogo-joule/watt must be the same quantity for
+data, but we don't care what it is.
+
+A recipe for platform characterization
+=======================================
+
+Obtaining the actual model data for a particular platform requires some way of
+measuring power/energy. There isn't a tool to help with this (yet). This
+section provides a recipe for use as reference. It covers the steps used to
+characterize the ARM TC2 development platform. This sort of measurements is
+expected to be done anyway when tuning cpuidle and cpufreq for a given
+platform.
+
+The energy model needs two types of data (struct sched_group_energy holds
+these) for each sched_group where energy costs should be taken into account:
+
+1. Capacity state information
+
+A list containing the compute capacity and power consumption when fully
+utilized attributed to the group as a whole for each available capacity state.
+At the lowest level (group contains just a single cpu) this is the power of the
+cpu alone without including power consumed by resources shared with other cpus.
+It basically needs to fit the basic modelling approach described in "Background
+and Terminology" section:
+
+	energy_system = energy_shared + n * energy_cpu
+
+for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at
+the lowest level. 'energy_shared' is included at the next level which
+represents the group of cpus among which the resources are shared.
+
+This model is, of course, a simplification of reality. Thus, power/energy
+attributions might not always exactly represent how the hardware is designed.
+Also, busy power is likely to depend on the workload. It is therefore
+recommended to use a representative mix of workloads when characterizing the
+capacity states.
+
+If the group has no capacity scaling support, the list will contain a single
+state where power is the busy power attributed to the group. The capacity
+should be set to a default value (1024).
+
+When frequency domains include multiple power domains, the group representing
+the frequency domain and all child groups share capacity states. This must be
+indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at
+all levels that share the capacity state must have the list of capacity states
+with the power set to the contribution of the individual group.
+
+2. Idle power information
+
+Stored in the idle_states list. The power number is the group idle power
+consumption in each idle state as well when the group is idle but has not
+entered an idle-state ('active idle' as mentioned earlier). Due to the way the
+energy model is defined, the idle power of the deepest group idle state can
+alternatively be accounted for in the parent group busy power. In that case the
+group idle state power values are offset such that the idle power of the
+deepest state is zero. It is less intuitive, but it is easier to measure as
+idle power consumed by the group and the busy/idle power of the parent group
+cannot be distinguished without per group measurement points.
+
+Measuring capacity states and idle power:
+
+The capacity states' capacity and power can be estimated by running a benchmark
+workload at each available capacity state. By restricting the benchmark to run
+on subsets of cpus it is possible to extrapolate the power consumption of
+shared resources.
+
+ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a
+shared L2 cache. TC2 has on-chip energy counters per cluster. Running a
+benchmark workload on just one cpu in a cluster means that power is consumed in
+the cluster (higher level group) and a single cpu (lowest level group). Adding
+another benchmark task to another cpu increases the power consumption by the
+amount consumed by the additional cpu. Hence, it is possible to extrapolate the
+cluster busy power.
+
+For platforms that don't have energy counters or equivalent instrumentation
+built-in, it may be possible to use an external DAQ to acquire similar data.
+
+If the benchmark includes some performance score (for example sysbench cpu
+benchmark), this can be used to record the compute capacity.
+
+Measuring idle power requires insight into the idle state implementation on the
+particular platform. Specifically, if the platform has coupled idle-states (or
+package states). To measure non-coupled per-cpu idle-states it is necessary to
+keep one cpu busy to keep any shared resources alive to isolate the idle power
+of the cpu from idle/busy power of the shared resources. The cpu can be tricked
+into different per-cpu idle states by disabling the other states. Based on
+various combinations of measurements with specific cpus busy and disabling
+idle-states it is possible to extrapolate the idle-state power.
diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt
new file mode 100644
index 000000000000..9bd2231c01b1
--- /dev/null
+++ b/Documentation/scheduler/sched-tune.txt
@@ -0,0 +1,366 @@
+             Central, scheduler-driven, power-performance control
+                               (EXPERIMENTAL)
+
+Abstract
+========
+
+The topic of a single simple power-performance tunable, that is wholly
+scheduler centric, and has well defined and predictable properties has come up
+on several occasions in the past [1,2]. With techniques such as a scheduler
+driven DVFS [3], we now have a good framework for implementing such a tunable.
+This document describes the overall ideas behind its design and implementation.
+
+
+Table of Contents
+=================
+
+1. Motivation
+2. Introduction
+3. Signal Boosting Strategy
+4. OPP selection using boosted CPU utilization
+5. Per task group boosting
+6. Question and Answers
+   - What about "auto" mode?
+   - What about boosting on a congested system?
+   - How CPUs are boosted when we have tasks with multiple boost values?
+7. References
+
+
+1. Motivation
+=============
+
+Sched-DVFS [3] is a new event-driven cpufreq governor which allows the
+scheduler to select the optimal DVFS operating point (OPP) for running a task
+allocated to a CPU. The introduction of sched-DVFS enables running workloads at
+the most energy efficient OPPs.
+
+However, sometimes it may be desired to intentionally boost the performance of
+a workload even if that could imply a reasonable increase in energy
+consumption. For example, in order to reduce the response time of a task, we
+may want to run the task at a higher OPP than the one that is actually required
+by it's CPU bandwidth demand.
+
+This last requirement is especially important if we consider that one of the
+main goals of the sched-DVFS component is to replace all currently available
+CPUFreq policies. Since sched-DVFS is event based, as opposed to the sampling
+driven governors we currently have, it is already more responsive at selecting
+the optimal OPP to run tasks allocated to a CPU. However, just tracking the
+actual task load demand may not be enough from a performance standpoint.  For
+example, it is not possible to get behaviors similar to those provided by the
+"performance" and "interactive" CPUFreq governors.
+
+This document describes an implementation of a tunable, stacked on top of the
+sched-DVFS which extends its functionality to support task performance
+boosting.
+
+By "performance boosting" we mean the reduction of the time required to
+complete a task activation, i.e. the time elapsed from a task wakeup to its
+next deactivation (e.g. because it goes back to sleep or it terminates).  For
+example, if we consider a simple periodic task which executes the same workload
+for 5[s] every 20[s] while running at a certain OPP, a boosted execution of
+that task must complete each of its activations in less than 5[s].
+
+A previous attempt [5] to introduce such a boosting feature has not been
+successful mainly because of the complexity of the proposed solution.  The
+approach described in this document exposes a single simple interface to
+user-space.  This single tunable knob allows the tuning of system wide
+scheduler behaviours ranging from energy efficiency at one end through to
+incremental performance boosting at the other end.  This first tunable affects
+all tasks. However, a more advanced extension of the concept is also provided
+which uses CGroups to boost the performance of only selected tasks while using
+the energy efficient default for all others.
+
+The rest of this document introduces in more details the proposed solution
+which has been named SchedTune.
+
+
+2. Introduction
+===============
+
+SchedTune exposes a simple user-space interface with a single power-performance
+tunable:
+
+  /proc/sys/kernel/sched_cfs_boost
+
+This permits expressing a boost value as an integer in the range [0..100].
+
+A value of 0 (default) configures the CFS scheduler for maximum energy
+efficiency. This means that sched-DVFS runs the tasks at the minimum OPP
+required to satisfy their workload demand.
+A value of 100 configures scheduler for maximum performance, which translates
+to the selection of the maximum OPP on that CPU.
+
+The range between 0 and 100 can be set to satisfy other scenarios suitably. For
+example to satisfy interactive response or depending on other system events
+(battery level etc).
+
+A CGroup based extension is also provided, which permits further user-space
+defined task classification to tune the scheduler for different goals depending
+on the specific nature of the task, e.g. background vs interactive vs
+low-priority.
+
+The overall design of the SchedTune module is built on top of "Per-Entity Load
+Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating
+Performance Point (OPP) selection.
+Each time a task is allocated on a CPU, sched-DVFS has the opportunity to tune
+the operating frequency of that CPU to better match the workload demand. The
+selection of the actual OPP being activated is influenced by the global boost
+value, or the boost value for the task CGroup when in use.
+
+This simple biasing approach leverages existing frameworks, which means minimal
+modifications to the scheduler, and yet it allows to achieve a range of
+different behaviours all from a single simple tunable knob.
+The only new concept introduced is that of signal boosting.
+
+
+3. Signal Boosting Strategy
+===========================
+
+The whole PELT machinery works based on the value of a few load tracking signals
+which basically track the CPU bandwidth requirements for tasks and the capacity
+of CPUs. The basic idea behind the SchedTune knob is to artificially inflate
+some of these load tracking signals to make a task or RQ appears more demanding
+that it actually is.
+
+Which signals have to be inflated depends on the specific "consumer".  However,
+independently from the specific (signal, consumer) pair, it is important to
+define a simple and possibly consistent strategy for the concept of boosting a
+signal.
+
+A boosting strategy defines how the "abstract" user-space defined
+sched_cfs_boost value is translated into an internal "margin" value to be added
+to a signal to get its inflated value:
+
+  margin         := boosting_strategy(sched_cfs_boost, signal)
+  boosted_signal := signal + margin
+
+Different boosting strategies were identified and analyzed before selecting the
+one found to be most effective.
+
+Signal Proportional Compensation (SPC)
+--------------------------------------
+
+In this boosting strategy the sched_cfs_boost value is used to compute a
+margin which is proportional to the complement of the original signal.
+When a signal has a maximum possible value, its complement is defined as
+the delta from the actual value and its possible maximum.
+
+Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as
+the maximum possible value, the margin becomes:
+
+	margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal)
+
+Using this boosting strategy:
+- a 100% sched_cfs_boost means that the signal is scaled to the maximum value
+- each value in the range of sched_cfs_boost effectively inflates the signal in
+  question by a quantity which is proportional to the maximum value.
+
+For example, by applying the SPC boosting strategy to the selection of the OPP
+to run a task it is possible to achieve these behaviors:
+
+-   0% boosting: run the task at the minimum OPP required by its workload
+- 100% boosting: run the task at the maximum OPP available for the CPU
+-  50% boosting: run at the half-way OPP between minimum and maximum
+
+Which means that, at 50% boosting, a task will be scheduled to run at half of
+the maximum theoretically achievable performance on the specific target
+platform.
+
+A graphical representation of an SPC boosted signal is represented in the
+following figure where:
+ a) "-" represents the original signal
+ b) "b" represents a  50% boosted signal
+ c) "p" represents a 100% boosted signal
+
+
+   ^
+   |  SCHED_LOAD_SCALE
+   +-----------------------------------------------------------------+
+   |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
+   |
+   |                                             boosted_signal
+   |                                          bbbbbbbbbbbbbbbbbbbbbbbb
+   |
+   |                                            original signal
+   |                  bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+
+   |                                          |
+   |bbbbbbbbbbbbbbbbbb                        |
+   |                                          |
+   |                                          |
+   |                                          |
+   |                  +-----------------------+
+   |                  |
+   |                  |
+   |                  |
+   |------------------+
+   |
+   |
+   +----------------------------------------------------------------------->
+
+The plot above shows a ramped load signal (titled 'original_signal') and it's
+boosted equivalent. For each step of the original signal the boosted signal
+corresponding to a 50% boost is midway from the original signal and the upper
+bound. Boosting by 100% generates a boosted signal which is always saturated to
+the upper bound.
+
+
+4. OPP selection using boosted CPU utilization
+==============================================
+
+It is worth calling out that the implementation does not introduce any new load
+signals. Instead, it provides an API to tune existing signals. This tuning is
+done on demand and only in scheduler code paths where it is sensible to do so.
+The new API calls are defined to return either the default signal or a boosted
+one, depending on the value of sched_cfs_boost. This is a clean an non invasive
+modification of the existing existing code paths.
+
+The signal representing a CPU's utilization is boosted according to the
+previously described SPC boosting strategy. To sched-DVFS, this allows a CPU
+(ie CFS run-queue) to appear more used then it actually is.
+
+Thus, with the sched_cfs_boost enabled we have the following main functions to
+get the current utilization of a CPU:
+
+  cpu_util()
+  boosted_cpu_util()
+
+The new boosted_cpu_util() is similar to the first but returns a boosted
+utilization signal which is a function of the sched_cfs_boost value.
+
+This function is used in the CFS scheduler code paths where sched-DVFS needs to
+decide the OPP to run a CPU at.
+For example, this allows selecting the highest OPP for a CPU which has
+the boost value set to 100%.
+
+
+5. Per task group boosting
+==========================
+
+The availability of a single knob which is used to boost all tasks in the
+system is certainly a simple solution but it quite likely doesn't fit many
+utilization scenarios, especially in the mobile device space.
+
+For example, on battery powered devices there usually are many background
+services which are long running and need energy efficient scheduling. On the
+other hand, some applications are more performance sensitive and require an
+interactive response and/or maximum performance, regardless of the energy cost.
+To better service such scenarios, the SchedTune implementation has an extension
+that provides a more fine grained boosting interface.
+
+A new CGroup controller, namely "schedtune", could be enabled which allows to
+defined and configure task groups with different boosting values.
+Tasks that require special performance can be put into separate CGroups.
+The value of the boost associated with the tasks in this group can be specified
+using a single knob exposed by the CGroup controller:
+
+   schedtune.boost
+
+This knob allows the definition of a boost value that is to be used for
+SPC boosting of all tasks attached to this group.
+
+The current schedtune controller implementation is really simple and has these
+main characteristics:
+
+  1) It is only possible to create 1 level depth hierarchies
+
+     The root control groups define the system-wide boost value to be applied
+     by default to all tasks. Its direct subgroups are named "boost groups" and
+     they define the boost value for specific set of tasks.
+     Further nested subgroups are not allowed since they do not have a sensible
+     meaning from a user-space standpoint.
+
+  2) It is possible to define only a limited number of "boost groups"
+
+     This number is defined at compile time and by default configured to 16.
+     This is a design decision motivated by two main reasons:
+     a) In a real system we do not expect utilization scenarios with more then few
+	boost groups. For example, a reasonable collection of groups could be
+        just "background", "interactive" and "performance".
+     b) It simplifies the implementation considerably, especially for the code
+	which has to compute the per CPU boosting once there are multiple
+        RUNNABLE tasks with different boost values.
+
+Such a simple design should allow servicing the main utilization scenarios identified
+so far. It provides a simple interface which can be used to manage the
+power-performance of all tasks or only selected tasks.
+Moreover, this interface can be easily integrated by user-space run-times (e.g.
+Android, ChromeOS) to implement a QoS solution for task boosting based on tasks
+classification, which has been a long standing requirement.
+
+Setup and usage
+---------------
+
+0. Use a kernel with CGROUP_SCHEDTUNE support enabled
+
+1. Check that the "schedtune" CGroup controller is available:
+
+   root@linaro-nano:~# cat /proc/cgroups
+   #subsys_name	hierarchy	num_cgroups	enabled
+   cpuset  	0		1		1
+   cpu     	0		1		1
+   schedtune	0		1		1
+
+2. Mount a tmpfs to create the CGroups mount point (Optional)
+
+   root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup
+
+3. Mount the "schedtune" controller
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune
+   root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune
+
+4. Setup the system-wide boost value (Optional)
+
+   If not configured the root control group has a 0% boost value, which
+   basically disables boosting for all tasks in the system thus running in
+   an energy-efficient mode.
+
+   root@linaro-nano:~# echo $SYSBOOST > /sys/fs/cgroup/stune/schedtune.boost
+
+5. Create task groups and configure their specific boost value (Optional)
+
+   For example here we create a "performance" boost group configure to boost
+   all its tasks to 100%
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance
+   root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost
+
+6. Move tasks into the boost group
+
+   For example, the following moves the tasks with PID $TASKPID (and all its
+   threads) into the "performance" boost group.
+
+   root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs
+
+This simple configuration allows only the threads of the $TASKPID task to run,
+when needed, at the highest OPP in the most capable CPU of the system.
+
+
+6. Question and Answers
+=======================
+
+What about "auto" mode?
+-----------------------
+
+The 'auto' mode as described in [5] can be implemented by interfacing SchedTune
+with some suitable user-space element. This element could use the exposed
+system-wide or cgroup based interface.
+
+How are multiple groups of tasks with different boost values managed?
+---------------------------------------------------------------------
+
+The current SchedTune implementation keeps track of the boosted RUNNABLE tasks
+on a CPU. Once sched-DVFS selects the OPP to run a CPU at, the CPU utilization
+is boosted with a value which is the maximum of the boost values of the
+currently RUNNABLE tasks in its RQ.
+
+This allows sched-DVFS to boost a CPU only while there are boosted tasks ready
+to run and switch back to the energy efficient mode as soon as the last boosted
+task is dequeued.
+
+
+7. References
+=============
+[1] http://lwn.net/Articles/552889
+[2] http://lkml.org/lkml/2012/5/18/91
+[3] http://lkml.org/lkml/2015/6/26/620
diff --git a/Documentation/tee.txt b/Documentation/tee.txt
new file mode 100644
index 000000000000..718599357596
--- /dev/null
+++ b/Documentation/tee.txt
@@ -0,0 +1,118 @@
+TEE subsystem
+This document describes the TEE subsystem in Linux.
+
+A TEE (Trusted Execution Environment) is a trusted OS running in some
+secure environment, for example, TrustZone on ARM CPUs, or a separate
+secure co-processor etc. A TEE driver handles the details needed to
+communicate with the TEE.
+
+This subsystem deals with:
+
+- Registration of TEE drivers
+
+- Managing shared memory between Linux and the TEE
+
+- Providing a generic API to the TEE
+
+The TEE interface
+=================
+
+include/uapi/linux/tee.h defines the generic interface to a TEE.
+
+User space (the client) connects to the driver by opening /dev/tee[0-9]* or
+/dev/teepriv[0-9]*.
+
+- TEE_IOC_SHM_ALLOC allocates shared memory and returns a file descriptor
+  which user space can mmap. When user space doesn't need the file
+  descriptor any more, it should be closed. When shared memory isn't needed
+  any longer it should be unmapped with munmap() to allow the reuse of
+  memory.
+
+- TEE_IOC_VERSION lets user space know which TEE this driver handles and
+  the its capabilities.
+
+- TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application.
+
+- TEE_IOC_INVOKE invokes a function in a Trusted Application.
+
+- TEE_IOC_CANCEL may cancel an ongoing TEE_IOC_OPEN_SESSION or TEE_IOC_INVOKE.
+
+- TEE_IOC_CLOSE_SESSION closes a session to a Trusted Application.
+
+There are two classes of clients, normal clients and supplicants. The latter is
+a helper process for the TEE to access resources in Linux, for example file
+system access. A normal client opens /dev/tee[0-9]* and a supplicant opens
+/dev/teepriv[0-9].
+
+Much of the communication between clients and the TEE is opaque to the
+driver. The main job for the driver is to receive requests from the
+clients, forward them to the TEE and send back the results. In the case of
+supplicants the communication goes in the other direction, the TEE sends
+requests to the supplicant which then sends back the result.
+
+OP-TEE driver
+=============
+
+The OP-TEE driver handles OP-TEE [1] based TEEs. Currently it is only the ARM
+TrustZone based OP-TEE solution that is supported.
+
+Lowest level of communication with OP-TEE builds on ARM SMC Calling
+Convention (SMCCC) [2], which is the foundation for OP-TEE's SMC interface
+[3] used internally by the driver. Stacked on top of that is OP-TEE Message
+Protocol [4].
+
+OP-TEE SMC interface provides the basic functions required by SMCCC and some
+additional functions specific for OP-TEE. The most interesting functions are:
+
+- OPTEE_SMC_FUNCID_CALLS_UID (part of SMCCC) returns the version information
+  which is then returned by TEE_IOC_VERSION
+
+- OPTEE_SMC_CALL_GET_OS_UUID returns the particular OP-TEE implementation, used
+  to tell, for instance, a TrustZone OP-TEE apart from an OP-TEE running on a
+  separate secure co-processor.
+
+- OPTEE_SMC_CALL_WITH_ARG drives the OP-TEE message protocol
+
+- OPTEE_SMC_GET_SHM_CONFIG lets the driver and OP-TEE agree on which memory
+  range to used for shared memory between Linux and OP-TEE.
+
+The GlobalPlatform TEE Client API [5] is implemented on top of the generic
+TEE API.
+
+Picture of the relationship between the different components in the
+OP-TEE architecture.
+
+    User space                  Kernel                   Secure world
+    ~~~~~~~~~~                  ~~~~~~                   ~~~~~~~~~~~~
+ +--------+                                             +-------------+
+ | Client |                                             | Trusted     |
+ +--------+                                             | Application |
+    /\                                                  +-------------+
+    || +----------+                                           /\
+    || |tee-      |                                           ||
+    || |supplicant|                                           \/
+    || +----------+                                     +-------------+
+    \/      /\                                          | TEE Internal|
+ +-------+  ||                                          | API         |
+ + TEE   |  ||            +--------+--------+           +-------------+
+ | Client|  ||            | TEE    | OP-TEE |           | OP-TEE      |
+ | API   |  \/            | subsys | driver |           | Trusted OS  |
+ +-------+----------------+----+-------+----+-----------+-------------+
+ |      Generic TEE API        |       |     OP-TEE MSG               |
+ |      IOCTL (TEE_IOC_*)      |       |     SMCCC (OPTEE_SMC_CALL_*) |
+ +-----------------------------+       +------------------------------+
+
+RPC (Remote Procedure Call) are requests from secure world to kernel driver
+or tee-supplicant. An RPC is identified by a special range of SMCCC return
+values from OPTEE_SMC_CALL_WITH_ARG. RPC messages which are intended for the
+kernel are handled by the kernel driver. Other RPC messages will be forwarded to
+tee-supplicant without further involvement of the driver, except switching
+shared memory buffer representation.
+
+References:
+[1] https://github.com/OP-TEE/optee_os
+[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+[3] drivers/tee/optee/optee_smc.h
+[4] drivers/tee/optee/optee_msg.h
+[5] http://www.globalplatform.org/specificationsdevice.asp look for
+    "TEE Client API Specification v1.0" and click download.
diff --git a/MAINTAINERS b/MAINTAINERS
index ab65bbecb159..c40ab482dd3e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3705,6 +3705,16 @@ S:	Maintained
 F:	drivers/gpu/drm/gma500
 F:	include/drm/gma500*
 
+DRM DRIVERS FOR HISILICON
+M:	Xinliang Liu <z.liuxinliang@hisilicon.com>
+R:	Xinwei Kong <kong.kongxinwei@hisilicon.com>
+R:	Chen Feng <puck.chen@hisilicon.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://github.com/xin3liang/linux.git
+S:	Maintained
+F:	drivers/gpu/drm/hisilicon/
+F:	Documentation/devicetree/bindings/display/hisilicon/
+
 DRM DRIVERS FOR NVIDIA TEGRA
 M:	Thierry Reding <thierry.reding@gmail.com>
 M:	Terje Bergström <tbergstrom@nvidia.com>
@@ -7935,6 +7945,11 @@ F:	arch/*/oprofile/
 F:	drivers/oprofile/
 F:	include/linux/oprofile.h
 
+OP-TEE DRIVER
+M:	Jens Wiklander <jens.wiklander@linaro.org>
+S:	Maintained
+F:	drivers/tee/optee/
+
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
 M:	Mark Fasheh <mfasheh@suse.com>
 M:	Joel Becker <jlbec@evilplan.org>
@@ -9361,6 +9376,14 @@ F:	drivers/hwtracing/stm/
 F:	include/linux/stm.h
 F:	include/uapi/linux/stm.h
 
+TEE SUBSYSTEM
+M:	Jens Wiklander <jens.wiklander@linaro.org>
+S:	Maintained
+F:	include/linux/tee_drv.h
+F:	include/uapi/linux/tee.h
+F:	drivers/tee/
+F:	Documentation/tee.txt
+
 THUNDERBOLT DRIVER
 M:	Andreas Noever <andreas.noever@gmail.com>
 S:	Maintained
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 34e1569a11ee..2687c934c6c0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -37,6 +37,7 @@ config ARM
 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARM_SMCCC if CPU_V7
 	select HAVE_BPF_JIT
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
@@ -1481,7 +1482,7 @@ config HOTPLUG_CPU
 
 config ARM_PSCI
 	bool "Support for the ARM Power State Coordination Interface (PSCI)"
-	depends on CPU_V7
+	depends on HAVE_ARM_SMCCC
 	select ARM_PSCI_FW
 	help
 	  Say Y here if you want Linux to communicate with system firmware
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 370f7a732900..e3e596cbb1a7 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -24,6 +24,13 @@ void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#ifdef CONFIG_CPU_FREQ
+#include <linux/cpufreq.h>
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+#endif
+#define arch_scale_cpu_capacity scale_cpu_capacity
+extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
+
 #else
 
 static inline void init_cpu_topology(void) { }
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 3c789496297f..82bdac0f2804 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -87,8 +87,9 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
+obj-$(CONFIG_HAVE_ARM_SMCCC)	+= smccc-call.o
+
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index f89811fb9a55..7e45f69a0ddc 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -16,6 +16,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/arm-smccc.h>
 
 #include <asm/checksum.h>
 #include <asm/ftrace.h>
@@ -175,3 +176,8 @@ EXPORT_SYMBOL(__gnu_mcount_nc);
 EXPORT_SYMBOL(__pv_phys_pfn_offset);
 EXPORT_SYMBOL(__pv_offset);
 #endif
+
+#ifdef CONFIG_HAVE_ARM_SMCCC
+EXPORT_SYMBOL(arm_smccc_smc);
+EXPORT_SYMBOL(arm_smccc_hvc);
+#endif
diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S
deleted file mode 100644
index a78e9e1e206d..000000000000
--- a/arch/arm/kernel/psci-call.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2015 ARM Limited
- *
- * Author: Mark Rutland <mark.rutland@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/opcodes-sec.h>
-#include <asm/opcodes-virt.h>
-
-/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
-ENTRY(__invoke_psci_fn_hvc)
-	__HVC(0)
-	bx	lr
-ENDPROC(__invoke_psci_fn_hvc)
-
-/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
-ENTRY(__invoke_psci_fn_smc)
-	__SMC(0)
-	bx	lr
-ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S
new file mode 100644
index 000000000000..2e48b674aab1
--- /dev/null
+++ b/arch/arm/kernel/smccc-call.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/linkage.h>
+
+#include <asm/opcodes-sec.h>
+#include <asm/opcodes-virt.h>
+#include <asm/unwind.h>
+
+	/*
+	 * Wrap c macros in asm macros to delay expansion until after the
+	 * SMCCC asm macro is expanded.
+	 */
+	.macro SMCCC_SMC
+	__SMC(0)
+	.endm
+
+	.macro SMCCC_HVC
+	__HVC(0)
+	.endm
+
+	.macro SMCCC instr
+UNWIND(	.fnstart)
+	mov	r12, sp
+	push	{r4-r7}
+UNWIND(	.save	{r4-r7})
+	ldm	r12, {r4-r7}
+	\instr
+	pop	{r4-r7}
+	ldr	r12, [sp, #(4 * 4)]
+	stm	r12, {r0-r3}
+	bx	lr
+UNWIND(	.fnend)
+	.endm
+
+/*
+ * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
+ *		  unsigned long a3, unsigned long a4, unsigned long a5,
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ */
+ENTRY(arm_smccc_smc)
+	SMCCC SMCCC_SMC
+ENDPROC(arm_smccc_smc)
+
+/*
+ * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
+ *		  unsigned long a3, unsigned long a4, unsigned long a5,
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ */
+ENTRY(arm_smccc_hvc)
+	SMCCC SMCCC_HVC
+ENDPROC(arm_smccc_hvc)
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 08b7847bf912..0308342def8c 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -42,9 +42,15 @@
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale);
 
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
+#if CONFIG_CPU_FREQ
+	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
+
+	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
+#else
 	return per_cpu(cpu_scale, cpu);
+#endif
 }
 
 static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
@@ -153,6 +159,8 @@ static void __init parse_dt_topology(void)
 
 }
 
+static const struct sched_group_energy * const cpu_core_energy(int cpu);
+
 /*
  * Look for a customed capacity of a CPU in the cpu_capacity table during the
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
@@ -160,10 +168,14 @@ static void __init parse_dt_topology(void)
  */
 static void update_cpu_capacity(unsigned int cpu)
 {
-	if (!cpu_capacity(cpu))
-		return;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
+
+	if (cpu_core_energy(cpu)) {
+		int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
+		capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
+	}
 
-	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	set_capacity_scale(cpu, capacity);
 
 	pr_info("CPU%u: update cpu_capacity %lu\n",
 		cpu, arch_scale_cpu_capacity(NULL, cpu));
@@ -277,7 +289,8 @@ void store_cpu_topology(unsigned int cpuid)
 
 static inline int cpu_corepower_flags(void)
 {
-	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN | \
+	       SD_SHARE_CAP_STATES;
 }
 
 static struct sched_domain_topology_level arm_topology[] = {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 14cdc6dea493..0df138c41bdf 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -92,6 +92,7 @@ config ARM64
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select HAVE_CONTEXT_TRACKING
+	select HAVE_ARM_SMCCC
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 4043c35962cc..94c410d4cd84 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -36,6 +36,7 @@ config ARCH_LAYERSCAPE
 
 config ARCH_HISI
 	bool "Hisilicon SoC Family"
+	select ARM_TIMER_SP804
 	help
 	  This enables support for Hisilicon ARMv8 SoC family
 
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 8d43a0fce522..c36bd3aeea82 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -6,11 +6,10 @@
  */
 
 /dts-v1/;
-
-/*Reserved 1MB memory for MCU*/
-/memreserve/ 0x05e00000 0x00100000;
+#include <dt-bindings/gpio/gpio.h>
 
 #include "hi6220.dtsi"
+#include "hikey-pinctrl.dtsi"
 
 / {
 	model = "HiKey Development Board";
@@ -27,8 +26,389 @@
 		stdout-path = "serial3:115200n8";
 	};
 
+	/*
+	 * Reserve below regions from memory node:
+	 *
+	 *  0x05e0,0000 - 0x05ef,ffff: MCU firmware runtime using
+	 *  0x05f0,1000 - 0x05f0,1fff: Reboot reason
+	 *  0x06df,f000 - 0x06df,ffff: Mailbox message data
+	 *  0x0740,f000 - 0x0740,ffff: MCU firmware section
+	 *  0x21f0,0000 - 0x21ff,ffff: pstore/ramoops buffer
+	 *  0x3e00,0000 - 0x3fff,ffff: OP-TEE
+	 */
 	memory@0 {
 		device_type = "memory";
-		reg = <0x0 0x0 0x0 0x40000000>;
+		reg = <0x00000000 0x00000000 0x00000000 0x05e00000>,
+		      <0x00000000 0x05f00000 0x00000000 0x00001000>,
+		      <0x00000000 0x05f02000 0x00000000 0x00efd000>,
+		      <0x00000000 0x06e00000 0x00000000 0x0060f000>,
+		      <0x00000000 0x07410000 0x00000000 0x1aaf0000>,
+		      <0x00000000 0x22000000 0x00000000 0x1c000000>;
+	};
+
+	firmware {
+		optee {
+			compatible = "linaro,optee-tz";
+			method = "smc";
+		};
+	};
+
+	pstore: pstore@0x21f00000 {
+		no-map;
+		reg = <0x0 0x21f00000 0x0 0x00100000>;  /* pstore/ramoops buffer */
+	};
+
+	ramoops {
+		compatible = "ramoops";
+		memory-region = <&pstore>;
+		record-size	= <0x0 0x00020000>;
+		console-size	= <0x0 0x00020000>;
+		ftrace-size	= <0x0 0x00020000>;
+	};
+
+	reboot_reason: reboot-reason@05f01000 {
+		compatible		= "linux,reboot-reason-sram";
+		reg			= <0x0 0x05F01000 0x0 0x4>;
+		reason,none		= <0x77665501>;
+		reason,bootloader	= <0x77665500>;
+		reason,recovery		= <0x77665502>;
+		reason,oem		= <0x6f656d00>;
+	};
+
+	soc {
+		i2c0: i2c@f7100000 {
+			status = "ok";
+		};
+
+		i2c1: i2c@f7101000 {
+			status = "ok";
+		};
+
+		uart1: uart@f7111000 {
+			status = "ok";
+		};
+
+		uart2: uart@f7112000 {
+			status = "ok";
+		};
+
+		uart3: uart@f7113000 {
+			status = "ok";
+		};
+
+		dwmmc_2: dwmmc2@f723f000 {
+			ti,non-removable;
+			non-removable;
+			/* WL_EN */
+			vmmc-supply = <&wlan_en_reg>;
+
+			#address-cells = <0x1>;
+			#size-cells = <0x0>;
+			wlcore: wlcore@2 {
+				compatible = "ti,wl1835";
+				reg = <2>;	/* sdio func num */
+				/* WL_IRQ, WL_HOST_WAKE_GPIO1_3 */
+				interrupt-parent = <&gpio1>;
+				interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+			};
+		};
+
+		wlan_en_reg: fixedregulator@1 {
+			compatible = "regulator-fixed";
+			regulator-name = "wlan-en-regulator";
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <1800000>;
+			/* WLAN_EN GPIO */
+			gpio = <&gpio0 5 0>;
+			/* WLAN card specific delay */
+			startup-delay-us = <70000>;
+			enable-active-high;
+		};
+
+		hisi-ion@0 {
+			compatible = "hisilicon,ion";
+
+			heap_sys_user@0 {
+				heap-name = "sys_user";
+				heap-range = <0x0 0x0>;
+				heap-type = "ion_system";
+			};
+
+			heap_sys_contig@0 {
+				heap-name = "sys_contig";
+				heap-range = <0x0 0x0>;
+				heap-type = "ion_system_contig";
+			};
+
+			heap_cma@0 {
+				heap-name = "cma";
+				heap-range = <0x0 0x0>;
+				heap-type = "ion_cma";
+			};
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+		user_led1 {
+			label = "user_led4";
+			gpios = <&gpio4 0 0>; /* <&gpio_user_led_1>; */
+			linux,default-trigger = "heartbeat";
+		};
+
+		user_led2 {
+			label = "user_led3";
+			gpios = <&gpio4 1 0>; /* <&gpio_user_led_2>; */
+			linux,default-trigger = "mmc0";
+		};
+
+		user_led3 {
+			label = "user_led2";
+			gpios = <&gpio4 2 0>; /* <&gpio_user_led_3>; */
+			linux,default-trigger = "mmc1";
+		};
+
+		user_led4 {
+			label = "user_led1";
+			gpios = <&gpio4 3 0>; /* <&gpio_user_led_4>; */
+			linux,default-trigger = "cpu0";
+		};
+
+		wlan_active_led {
+			label = "wifi_active";
+			gpios = <&gpio3 5 0>; /* <&gpio_wlan_active_led>; */
+			linux,default-trigger = "phy0tx";
+			default-state = "off";
+		};
+
+		bt_active_led {
+			label = "bt_active";
+			gpios = <&gpio4 7 0>; /* <&gpio_bt_active_led>; */
+			linux,default-trigger = "hci0rx";
+			default-state = "off";
+		};
+	};
+
+	kim {
+		compatible = "kim";
+		pinctrl-names = "default";
+		pinctrl-0 = <>; /* FIXME: add BT PCM pinctrl here */
+		/*
+		 * FIXME: The following is complete CRAP since
+		 * the vendor driver doesn't follow the gpio
+		 * binding. Passing in a magic Linux gpio number
+		 * here until we fix the vendor driver.
+		 */
+		/* BT_EN: BT_REG_ON_GPIO1_7 */
+		nshutdown_gpio = <503>;
+		dev_name = "/dev/ttyAMA1";
+		flow_cntrl = <1>;
+		baud_rate = <3000000>;
+	};
+
+	btwilink {
+		compatible = "btwilink";
+	};
+
+	pmic: pmic@f8000000 {
+		compatible = "hisilicon,hi655x-pmic";
+		reg = <0x0 0xf8000000 0x0 0x1000>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+		status = "okay";
+
+		ponkey:ponkey@b1{
+			compatible = "hisilicon,hi6552-powerkey";
+			interrupt-parent = <&pmic>;
+			interrupts = <6 0>, <5 0>, <4 0>;
+			interrupt-names = "down", "up", "hold 4s";
+		};
+
+		regulators {
+			ldo2: LDO2 {
+				regulator-name = "LDO2_2V8";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <3200000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo7: LDO7 {
+				regulator-name = "LDO7_SDIO";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo10: LDO10 {
+				regulator-name = "LDO10_2V85";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-enable-ramp-delay = <360>;
+			};
+
+			ldo13: LDO13 {
+				regulator-name = "LDO13_1V8";
+				regulator-min-microvolt = <1600000>;
+				regulator-max-microvolt = <1950000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo14: LDO14 {
+				regulator-name = "LDO14_2V8";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <3200000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo15: LDO15 {
+				regulator-name = "LDO15_1V8";
+				regulator-min-microvolt = <1600000>;
+				regulator-max-microvolt = <1950000>;
+				regulator-boot-on;
+				regulator-always-on;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo17: LDO17 {
+				regulator-name = "LDO17_2V5";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <3200000>;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo19: LDO19 {
+				regulator-name = "LDO19_3V0";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-enable-ramp-delay = <360>;
+			};
+
+			ldo21: LDO21 {
+				regulator-name = "LDO21_1V8";
+				regulator-min-microvolt = <1650000>;
+				regulator-max-microvolt = <2000000>;
+				regulator-always-on;
+				regulator-enable-ramp-delay = <120>;
+			};
+
+			ldo22: LDO22 {
+				regulator-name = "LDO22_1V2";
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <1200000>;
+				regulator-boot-on;
+				regulator-always-on;
+				regulator-enable-ramp-delay = <120>;
+			};
+		};
+	};
+};
+
+&ade {
+	status = "ok";
+};
+
+&dsi {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	mux-gpio = <&gpio0 1 0>;
+	status = "ok";
+
+	ports {
+		/* 1 for output port */
+		port@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+
+			dsi_out0: endpoint@0 {
+				reg = <0>;
+				remote-endpoint = <&adv7533_in>;
+			};
+
+			dsi_out1: endpoint@1 {
+				reg = <1>;
+				remote-endpoint = <&panel0_in>;
+			};
+		};
+	};
+
+	/* For panel reg's value should >= 1 */
+	panel@1 {
+		compatible = "innolux,n070icn-pb1";
+		reg = <1>;
+		power-on-delay= <50>;
+		reset-delay = <100>;
+		init-delay = <100>;
+		panel-width-mm = <58>;
+		panel-height-mm = <103>;
+		pwr-en-gpio = <&gpio2 1 0>;
+		bl-en-gpio = <&gpio2 3 0>;
+		pwm-gpio = <&gpio12 7 0>;
+
+		port {
+			panel0_in: endpoint {
+				remote-endpoint = <&dsi_out1>;
+			};
+		};
+	};
+};
+
+&i2c2 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "ok";
+
+	adv7533: adv7533@39 {
+		compatible = "adi,adv7533";
+		reg = <0x39>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <1 2>;
+		pd-gpio = <&gpio0 4 0>;
+		adi,dsi-lanes = <4>;
+		adi,disable-timing-generator;
+
+		port {
+			adv7533_in: endpoint {
+				remote-endpoint = <&dsi_out0>;
+			};
+		};
+	};
+ };
+
+&i2c0 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "ok";
+
+	tp: tp@8{
+		compatible = "FocalTech,ft5506";
+		reg = <0x38>;
+		interrupt-parent = <&gpio2>;
+		interrupts = <7 2>;
+		rst-gpio = <&gpio9 1 1>;
+	};
+};
+
+&i2c1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "ok";
+
+	icn6201: icn6201@5a{
+		compatible = "ChipOne,icn6201";
+		reg = <0x2d>;
+	};
+};
+
+&spi0 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "ok";
+
+	spidev@0 {
+		compatible = "linux,spidev";
+		spi-max-frequency = <500000>;
+		reg = <0>;
 	};
 };
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-sched-energy.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220-sched-energy.dtsi
new file mode 100644
index 000000000000..6dfc49332b4f
--- /dev/null
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-sched-energy.dtsi
@@ -0,0 +1,69 @@
+/*
+ * Hikey specific energy cost model data.
+ */
+
+/* static struct idle_state idle_states_cluster_a53[] = { */
+/*        { .power = 47 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */
+/*        { .power = 47 }, /\* WFI *\/ */
+/*        { .power = 47 }, /\* cpu-sleep-0 *\/ */
+/*        { .power = 0  }, /\* cluster-sleep-0 *\/ */
+/* }; */
+
+/* static struct capacity_state cap_states_cluster_a53[] = { */
+/*        /\* Power per cluster *\/ */
+/*        { .cap =  178, .power =  16, }, /\*  200 MHz *\/ */
+/*        { .cap =  369, .power =  29, }, /\*  432 MHz *\/ */
+/*        { .cap =  622, .power =  47, }, /\*  729 MHz *\/ */
+/*        { .cap =  819, .power =  75, }, /\*  960 MHz *\/ */
+/*        { .cap = 1024, .power = 112, }, /\* 1200 Mhz *\/ */
+/* }; */
+
+/* static struct idle_state idle_states_core_a53[] = { */
+/*        { .power = 15 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */
+/*        { .power = 15 }, /\* WFI *\/ */
+/*        { .power =  0 }, /\* cpu-sleep-0 *\/ */
+/*        { .power =  0 }, /\* cluster-sleep-0 *\/ */
+/* }; */
+
+/* static struct capacity_state cap_states_core_a53[] = { */
+/*         /\* Power per cpu *\/ */
+/*        { .cap =  178, .power =  69, }, /\*  200 MHz *\/ */
+/*        { .cap =  369, .power = 124, }, /\*  432 MHz *\/ */
+/*        { .cap =  622, .power = 224, }, /\*  729 MHz *\/ */
+/*        { .cap =  819, .power = 367, }, /\*  960 MHz *\/ */
+/*        { .cap = 1024, .power = 670, }, /\* 1200 Mhz *\/ */
+/* }; */
+
+energy-costs {
+	CPU_COST: core-cost {
+		busy-cost-data = <
+			178    69
+			369   124
+			622   224
+			819   367
+		       1024   670
+		>;
+		idle-cost-data = <
+		         15
+		         15
+		          0
+		          0
+		>;
+	};
+
+	CLUSTER_COST: cluster-cost {
+		busy-cost-data = <
+			178    16
+			369    29
+			622    47
+			819    75
+		       1024   112
+		>;
+		idle-cost-data = <
+			 47
+			 47
+			 47
+			  0
+		>;
+	};
+};
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 82d2488a0e86..18c92b3daf5b 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -6,6 +6,9 @@
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/clock/hi6220-clock.h>
+#include <dt-bindings/pinctrl/hisi.h>
+#include <dt-bindings/thermal/thermal.h>
+#include <dt-bindings/reset/hisi,hi6220-resets.h>
 
 / {
 	compatible = "hisilicon,hi6220";
@@ -53,11 +56,43 @@
 			};
 		};
 
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP: cpu-sleep {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x0010000>;
+				entry-latency-us = <700>;
+				exit-latency-us = <250>;
+				min-residency-us = <1000>;
+			};
+
+			CLUSTER_SLEEP: cluster-sleep {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x1010000>;
+				entry-latency-us = <1000>;
+				exit-latency-us = <700>;
+				min-residency-us = <2700>;
+				wakeup-latency-us = <1500>;
+			};
+		};
+
 		cpu0: cpu@0 {
 			compatible = "arm,cortex-a53", "arm,armv8";
 			device_type = "cpu";
 			reg = <0x0 0x0>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER0_L2>;
+			clocks = <&stub_clock 0>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cooling-min-level = <4>;
+			cooling-max-level = <0>;
+			#cooling-cells = <2>; /* min followed by max */
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
+			dynamic-power-coefficient = <311>;
 		};
 
 		cpu1: cpu@1 {
@@ -65,6 +100,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x1>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER0_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu2: cpu@2 {
@@ -72,6 +111,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x2>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER0_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu3: cpu@3 {
@@ -79,6 +122,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x3>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER0_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu4: cpu@100 {
@@ -86,6 +133,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x100>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER1_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu5: cpu@101 {
@@ -93,6 +144,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x101>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER1_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu6: cpu@102 {
@@ -100,6 +155,10 @@
 			device_type = "cpu";
 			reg = <0x0 0x102>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER1_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
 		};
 
 		cpu7: cpu@103 {
@@ -107,6 +166,51 @@
 			device_type = "cpu";
 			reg = <0x0 0x103>;
 			enable-method = "psci";
+			next-level-cache = <&CLUSTER1_L2>;
+			operating-points-v2 = <&cpu_opp_table>;
+			cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>;
+			sched-energy-costs = <&CPU_COST &CLUSTER_COST>;
+		};
+
+		CLUSTER0_L2: l2-cache0 {
+			compatible = "cache";
+		};
+
+		CLUSTER1_L2: l2-cache1 {
+			compatible = "cache";
+		};
+
+		/include/ "hi6220-sched-energy.dtsi"
+	};
+
+	cpu_opp_table: cpu_opp_table {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp00 {
+			opp-hz = /bits/ 64 <208000000>;
+			opp-microvolt = <1040000>;
+			clock-latency-ns = <500000>;
+		};
+		opp01 {
+			opp-hz = /bits/ 64 <432000000>;
+			opp-microvolt = <1040000>;
+			clock-latency-ns = <500000>;
+		};
+		opp02 {
+			opp-hz = /bits/ 64 <729000000>;
+			opp-microvolt = <1090000>;
+			clock-latency-ns = <500000>;
+		};
+		opp03 {
+			opp-hz = /bits/ 64 <960000000>;
+			opp-microvolt = <1180000>;
+			clock-latency-ns = <500000>;
+		};
+		opp04 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <1330000>;
+			clock-latency-ns = <500000>;
 		};
 	};
 
@@ -135,8 +239,15 @@
 		compatible = "simple-bus";
 		#address-cells = <2>;
 		#size-cells = <2>;
+		#sound-dai-cells = <0>;
+		interrupt-parent = <&gic>;
 		ranges;
 
+		sram: sram@fff80000 {
+			compatible = "hisilicon,hi6220-sramctrl", "syscon";
+			reg = <0x0 0xfff80000 0x0 0x12000>;
+		};
+
 		ao_ctrl: ao_ctrl@f7800000 {
 			compatible = "hisilicon,hi6220-aoctrl", "syscon";
 			reg = <0x0 0xf7800000 0x0 0x2000>;
@@ -147,12 +258,14 @@
 			compatible = "hisilicon,hi6220-sysctrl", "syscon";
 			reg = <0x0 0xf7030000 0x0 0x2000>;
 			#clock-cells = <1>;
+			#reset-cells = <1>;
 		};
 
 		media_ctrl: media_ctrl@f4410000 {
 			compatible = "hisilicon,hi6220-mediactrl", "syscon";
 			reg = <0x0 0xf4410000 0x0 0x1000>;
 			#clock-cells = <1>;
+			#reset-cells = <1>;
 		};
 
 		pm_ctrl: pm_ctrl@f7032000 {
@@ -161,6 +274,19 @@
 			#clock-cells = <1>;
 		};
 
+		stub_clock: stub_clock {
+			compatible = "hisilicon,hi6220-stub-clk";
+			hisilicon,hi6220-clk-sram = <&sram>;
+			#clock-cells = <1>;
+			mbox-names = "mbox-tx";
+			mboxes = <&mailbox 1 0 11>;
+		};
+
+		medianoc_ade: medianoc_ade@f4520000 {
+			compatible = "syscon";
+			reg = <0x0 0xf4520000 0x0 0x4000>;
+		};
+
 		uart0: uart@f8015000 {	/* console */
 			compatible = "arm,pl011", "arm,primecell";
 			reg = <0x0 0xf8015000 0x0 0x1000>;
@@ -177,6 +303,8 @@
 			clocks = <&sys_ctrl HI6220_UART1_PCLK>,
 				 <&sys_ctrl HI6220_UART1_PCLK>;
 			clock-names = "uartclk", "apb_pclk";
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart1_pmx_func &uart1_cfg_func1 &uart1_cfg_func2>;
 			status = "disabled";
 		};
 
@@ -187,6 +315,8 @@
 			clocks = <&sys_ctrl HI6220_UART2_PCLK>,
 				 <&sys_ctrl HI6220_UART2_PCLK>;
 			clock-names = "uartclk", "apb_pclk";
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart2_pmx_func &uart2_cfg_func>;
 			status = "disabled";
 		};
 
@@ -197,6 +327,9 @@
 			clocks = <&sys_ctrl HI6220_UART3_PCLK>,
 				 <&sys_ctrl HI6220_UART3_PCLK>;
 			clock-names = "uartclk", "apb_pclk";
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart3_pmx_func &uart3_cfg_func>;
+			status = "disabled";
 		};
 
 		uart4: uart@f7114000 {
@@ -206,7 +339,694 @@
 			clocks = <&sys_ctrl HI6220_UART4_PCLK>,
 				 <&sys_ctrl HI6220_UART4_PCLK>;
 			clock-names = "uartclk", "apb_pclk";
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart4_pmx_func &uart4_cfg_func>;
 			status = "disabled";
 		};
+
+		dma0: dma@f7370000 {
+			compatible = "hisilicon,k3-dma-1.0";
+			reg = <0x0 0xf7370000 0x0 0x1000>;
+			#dma-cells = <1>;
+			dma-channels = <15>;
+			dma-requests = <32>;
+			interrupts = <0 84 4>;
+			clocks = <&sys_ctrl HI6220_EDMAC_ACLK>;
+			dma-no-cci;
+			dma-type = "hi6220_dma";
+			status = "ok";
+		};
+
+		dual_timer0: dual_timer@f8008000 {
+			compatible = "arm,sp804", "arm,primecell";
+			reg = <0x0 0xf8008000 0x0 0x1000>;
+			interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&ao_ctrl 27>;
+			clock-names = "apb_pclk";
+		};
+
+		rtc0: rtc@170000 {
+			compatible = "arm,pl031", "arm,primecell";
+			reg = <0x0 0xf8003000 0x0 0x1000>;
+			interrupts = <0 12 4>;
+			clocks = <&ao_ctrl HI6220_RTC0_PCLK>;
+			clock-names = "apb_pclk";
+		};
+
+		pmx0: pinmux@f7010000 {
+			compatible = "pinctrl-single";
+			reg = <0x0 0xf7010000  0x0 0x27c>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			#gpio-range-cells = <3>;
+			pinctrl-single,register-width = <32>;
+			pinctrl-single,function-mask = <7>;
+			pinctrl-single,gpio-range = <
+				&range  80  8 MUX_M0 /* gpio  3: [0..7] */
+				&range  88  8 MUX_M0 /* gpio  4: [0..7] */
+				&range  96  8 MUX_M0 /* gpio  5: [0..7] */
+				&range 104  8 MUX_M0 /* gpio  6: [0..7] */
+				&range 112  8 MUX_M0 /* gpio  7: [0..7] */
+				&range 120  2 MUX_M0 /* gpio  8: [0..1] */
+				&range   2  6 MUX_M1 /* gpio  8: [2..7] */
+				&range   8  8 MUX_M1 /* gpio  9: [0..7] */
+				&range   0  1 MUX_M1 /* gpio 10: [0]    */
+				&range  16  7 MUX_M1 /* gpio 10: [1..7] */
+				&range  23  3 MUX_M1 /* gpio 11: [0..2] */
+				&range  28  5 MUX_M1 /* gpio 11: [3..7] */
+				&range  33  3 MUX_M1 /* gpio 12: [0..2] */
+				&range  43  5 MUX_M1 /* gpio 12: [3..7] */
+				&range  48  8 MUX_M1 /* gpio 13: [0..7] */
+				&range  56  8 MUX_M1 /* gpio 14: [0..7] */
+				&range  74  6 MUX_M1 /* gpio 15: [0..5] */
+				&range 122  1 MUX_M1 /* gpio 15: [6]    */
+				&range 126  1 MUX_M1 /* gpio 15: [7]    */
+				&range 127  8 MUX_M1 /* gpio 16: [0..7] */
+				&range 135  8 MUX_M1 /* gpio 17: [0..7] */
+				&range 143  8 MUX_M1 /* gpio 18: [0..7] */
+				&range 151  8 MUX_M1 /* gpio 19: [0..7] */
+			>;
+			range: gpio-range {
+				#pinctrl-single,gpio-range-cells = <3>;
+			};
+		};
+
+		pmx1: pinmux@f7010800 {
+			compatible = "pinconf-single";
+			reg = <0x0 0xf7010800 0x0 0x28c>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			pinctrl-single,register-width = <32>;
+		};
+
+		pmx2: pinmux@f8001800 {
+			compatible = "pinconf-single";
+			reg = <0x0 0xf8001800 0x0 0x78>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			pinctrl-single,register-width = <32>;
+		};
+
+		gpio0: gpio@f8011000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf8011000 0x0 0x1000>;
+			interrupts = <0 52 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio1: gpio@f8012000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf8012000 0x0 0x1000>;
+			interrupts = <0 53 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio2: gpio@f8013000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf8013000 0x0 0x1000>;
+			interrupts = <0 54 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio3: gpio@f8014000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf8014000 0x0 0x1000>;
+			interrupts = <0 55 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 80 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio4: gpio@f7020000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7020000 0x0 0x1000>;
+			interrupts = <0 56 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 88 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio5: gpio@f7021000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7021000 0x0 0x1000>;
+			interrupts = <0 57 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 96 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio6: gpio@f7022000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7022000 0x0 0x1000>;
+			interrupts = <0 58 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 104 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio7: gpio@f7023000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7023000 0x0 0x1000>;
+			interrupts = <0 59 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 112 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio8: gpio@f7024000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7024000 0x0 0x1000>;
+			interrupts = <0 60 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 120 2 &pmx0 2 2 6>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio9: gpio@f7025000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7025000 0x0 0x1000>;
+			interrupts = <0 61 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 8 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio10: gpio@f7026000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7026000 0x0 0x1000>;
+			interrupts = <0 62 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 0 1 &pmx0 1 16 7>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio11: gpio@f7027000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7027000 0x0 0x1000>;
+			interrupts = <0 63 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 23 3 &pmx0 3 28 5>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio12: gpio@f7028000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7028000 0x0 0x1000>;
+			interrupts = <0 64 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 33 3 &pmx0 3 43 5>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio13: gpio@f7029000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf7029000 0x0 0x1000>;
+			interrupts = <0 65 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 48 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio14: gpio@f702a000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702a000 0x0 0x1000>;
+			interrupts = <0 66 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 56 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio15: gpio@f702b000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702b000 0x0 0x1000>;
+			interrupts = <0 67 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <
+				&pmx0 0 74 6
+				&pmx0 6 122 1
+				&pmx0 7 126 1
+			>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio16: gpio@f702c000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702c000 0x0 0x1000>;
+			interrupts = <0 68 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 127 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio17: gpio@f702d000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702d000 0x0 0x1000>;
+			interrupts = <0 69 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 135 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio18: gpio@f702e000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702e000 0x0 0x1000>;
+			interrupts = <0 70 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 143 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		gpio19: gpio@f702f000 {
+			compatible = "arm,pl061", "arm,primecell";
+			reg = <0x0 0xf702f000 0x0 0x1000>;
+			interrupts = <0 71 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&pmx0 0 151 8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			clocks = <&ao_ctrl 2>;
+			clock-names = "apb_pclk";
+			status = "ok";
+		};
+
+		spi0: spi@f7106000 {
+			compatible = "arm,pl022", "arm,primecell";
+			reg = <0x0 0xf7106000 0x0 0x1000>;
+			interrupts = <0 50 4>;
+			bus-id = <0>;
+			enable-dma = <0>;
+			clocks = <&sys_ctrl HI6220_SPI_CLK>;
+			clock-names = "apb_pclk";
+			pinctrl-names = "default";
+			pinctrl-0 = <&spi0_pmx_func &spi0_cfg_func>;
+			num-cs = <1>;
+			cs-gpios = <&gpio6 2 0>;
+			status = "disabled";
+		};
+
+		i2c0: i2c@f7100000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xf7100000 0x0 0x1000>;
+			interrupts = <0 44 4>;
+			clocks = <&sys_ctrl HI6220_I2C0_CLK>;
+			i2c-sda-hold-time-ns = <300>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&i2c0_pmx_func &i2c0_cfg_func>;
+			status = "disabled";
+		};
+
+		i2c1: i2c@f7101000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xf7101000 0x0 0x1000>;
+			clocks = <&sys_ctrl HI6220_I2C1_CLK>;
+			interrupts = <0 45 4>;
+			i2c-sda-hold-time-ns = <300>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&i2c1_pmx_func &i2c1_cfg_func>;
+			status = "disabled";
+		};
+
+		i2c2: i2c@f7102000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xf7102000 0x0 0x1000>;
+			clocks = <&sys_ctrl HI6220_I2C2_CLK>;
+			interrupts = <0 46 4>;
+			i2c-sda-hold-time-ns = <300>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&i2c2_pmx_func &i2c2_cfg_func>;
+			status = "disabled";
+		};
+
+		fixed_5v_hub: regulator@0 {
+			compatible = "regulator-fixed";
+			regulator-name = "fixed_5v_hub";
+			regulator-min-microvolt = <5000000>;
+			regulator-max-microvolt = <5000000>;
+			regulator-boot-on;
+			gpio = <&gpio0 7 0>;
+			regulator-always-on;
+		};
+
+		usb_phy: usbphy {
+			compatible = "hisilicon,hi6220-usb-phy";
+			#phy-cells = <0>;
+			phy-supply = <&fixed_5v_hub>;
+			hisilicon,peripheral-syscon = <&sys_ctrl>;
+		};
+
+		usb: usb@f72c0000 {
+			compatible = "hisilicon,hi6220-usb";
+			reg = <0x0 0xf72c0000 0x0 0x40000>;
+			phys = <&usb_phy>;
+			phy-names = "usb2-phy";
+			clocks = <&sys_ctrl HI6220_USBOTG_HCLK>;
+			clock-names = "otg";
+			dr_mode = "otg";
+			g-use-dma;
+			g-rx-fifo-size = <512>;
+			g-np-tx-fifo-size = <128>;
+			g-tx-fifo-size = <128 128 128 128 128 128>;
+			interrupts = <0 77 0x4>;
+		};
+
+		mailbox: mailbox@f7510000 {
+			compatible = "hisilicon,hi6220-mbox";
+			reg = <0x0 0xf7510000 0x0 0x1000>, /* IPC_S */
+			      <0x0 0x06dff800 0x0 0x0800>; /* Mailbox buffer */
+			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+			#mbox-cells = <3>;
+		};
+
+		dwmmc_0: dwmmc0@f723d000 {
+		        compatible = "hisilicon,hi6220-dw-mshc";
+		        num-slots = <0x1>;
+			cap-mmc-highspeed;
+			non-removable;
+		        reg = <0x0 0xf723d000 0x0 0x1000>;
+		        interrupts = <0x0 0x48 0x4>;
+		        clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
+		        clock-names = "ciu", "biu";
+			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
+			bus-width = <0x8>;
+			vmmc-supply = <&ldo19>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
+				     &emmc_cfg_func &emmc_rst_cfg_func>;
+		};
+
+		dwmmc_1: dwmmc1@f723e000 {
+			compatible = "hisilicon,hi6220-dw-mshc";
+			num-slots = <0x1>;
+			card-detect-delay = <200>;
+			hisilicon,peripheral-syscon = <&ao_ctrl>;
+			cap-sd-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			sd-uhs-sdr50;
+			reg = <0x0 0xf723e000 0x0 0x1000>;
+			interrupts = <0x0 0x49 0x4>;
+			#address-cells = <0x1>;
+			#size-cells = <0x0>;
+			clocks = <&sys_ctrl 4>, <&sys_ctrl 3>;
+			clock-names = "ciu", "biu";
+			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
+			vqmmc-supply = <&ldo7>;
+			vmmc-supply = <&ldo10>;
+			bus-width = <0x4>;
+			disable-wp;
+			cd-gpios = <&gpio1 0 1>;
+			pinctrl-names = "default", "idle";
+			pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
+			pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
+		};
+
+		dwmmc_2: dwmmc2@f723f000 {
+			compatible = "hisilicon,hi6220-dw-mshc";
+			status = "okay";
+			num-slots = <0x1>;
+			reg = <0x0 0xf723f000 0x0 0x1000>;
+			interrupts = <0x0 0x4a 0x4>;
+			clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
+			clock-names = "ciu", "biu";
+			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
+			bus-width = <0x4>;
+			broken-cd;
+			pinctrl-names = "default", "idle";
+			pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
+			pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;
+		};
+
+		tsensor: tsensor@0,f7030700 {
+			compatible = "hisilicon,tsensor";
+			reg = <0x0 0xf7030700 0x0 0x1000>;
+			interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&sys_ctrl 22>;
+			clock-names = "thermal_clk";
+			#thermal-sensor-cells = <1>;
+		};
+
+		thermal-zones {
+
+			cls0: cls0 {
+				polling-delay = <1000>;
+				polling-delay-passive = <100>;
+				sustainable-power = <3326>;
+
+				/* sensor ID */
+				thermal-sensors = <&tsensor 2>;
+
+				trips {
+					threshold: trip-point@0 {
+						temperature = <65000>;
+						hysteresis = <1000>;
+						type = "passive";
+					};
+
+					target: trip-point@1 {
+						temperature = <75000>;
+						hysteresis = <1000>;
+						type = "passive";
+					};
+				};
+
+				cooling-maps {
+					map0 {
+						trip = <&target>;
+						contribution = <1024>;
+						cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+					};
+				};
+			};
+		};
+
+		mtcmos {
+			compatible = "hisilicon,hi6220-mtcmos-driver";
+			hisilicon,mtcmos-steady-us = <10>;
+			hisilicon,mtcmos-sc-on-base  = <0xf7800000>;
+			hisilicon,mtcmos-acpu-on-base = <0xf65a0000>;
+
+			g3d_vdd: regulator@a1{
+				regulator-name = "G3D_PD_VDD";
+				regulator-compatible = "mtcmos1";
+				hisilicon,ctrl-regs = <0x830 0x834 0x83c>;
+				hisilicon,ctrl-data = <1 0x1>;
+			};
+
+			soc_med: regulator@a2{
+				regulator-name = "SOC_MED";
+				regulator-compatible = "mtcmos2";
+				hisilicon,ctrl-regs = <0x830 0x834 0x83c>;
+				hisilicon,ctrl-data = <2 0x1>;
+			};
+		};
+
+		ade: ade@f4100000 {
+			compatible = "hisilicon,hi6220-ade";
+			reg = <0x0 0xf4100000 0x0 0x7800>;
+			reg-names = "ade_base";
+			hisilicon,noc-syscon = <&medianoc_ade>;
+			resets = <&media_ctrl MEDIA_ADE>;
+			interrupts = <0 115 4>; /* ldi interrupt */
+
+			clocks = <&media_ctrl HI6220_ADE_CORE>,
+				 <&media_ctrl HI6220_CODEC_JPEG>,
+				 <&media_ctrl HI6220_ADE_PIX_SRC>;
+			/*clock name*/
+			clock-names  = "clk_ade_core",
+				       "clk_codec_jpeg",
+				       "clk_ade_pix";
+
+			assigned-clocks = <&media_ctrl HI6220_ADE_CORE>,
+				<&media_ctrl HI6220_CODEC_JPEG>;
+			assigned-clock-rates = <360000000>, <288000000>;
+			status = "disabled";
+
+			port {
+				ade_out: endpoint {
+					remote-endpoint = <&dsi_in>;
+				};
+			};
+		};
+
+		dsi: dsi@f4107800 {
+			compatible = "hisilicon,hi6220-dsi";
+			reg = <0x0 0xf4107800 0x0 0x100>;
+			clocks = <&media_ctrl  HI6220_DSI_PCLK>;
+			clock-names = "pclk";
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				/* 0 for input port */
+				port@0 {
+					reg = <0>;
+					dsi_in: endpoint {
+						remote-endpoint = <&ade_out>;
+					};
+				};
+			};
+		};
+
+		mali:mali@f4080000 {
+			compatible = "arm,mali-450", "arm,mali-utgard";
+			reg = <0x0 0x3f100000 0x0 0x00708000>;
+			clocks = <&media_ctrl HI6220_G3D_CLK>,
+				 <&media_ctrl HI6220_G3D_PCLK>;
+			clock-names = "clk_g3d", "pclk_g3d";
+			G3D_PD_VDD-supply  = <&g3d_vdd>;
+			mali_def_freq = <500>;
+			pclk_freq = <144>;
+			dfs_steps = <2>;
+			dfs_lockprf = <1>;
+			dfs_limit_max_prf = <1>;
+			dfs_profile_num = <2>;
+			dfs_profiles = <250 3 0>, <500 1 0>;
+			mali_type = <2>;
+
+			interrupt-parent = <&gic>;
+			interrupts =	<1 126 4>, /*gp*/
+					<1 126 4>, /*gp mmu*/
+					<1 126 4>, /*pp bc*/
+					<1 126 4>, /*pmu*/
+					<1 126 4>, /*pp0*/
+					<1 126 4>,
+					<1 126 4>, /*pp1*/
+					<1 126 4>,
+					<1 126 4>, /*pp2*/
+					<1 126 4>,
+					<1 126 4>, /*pp4*/
+					<1 126 4>,
+					<1 126 4>, /*pp5*/
+					<1 126 4>,
+					<1 126 4>, /*pp6*/
+					<1 126 4>;
+			interrupt-names = "IRQGP", "IRQGPMMU", "IRQPP", "IRQPMU",
+					"IRQPP0", "IRQPPMMU0", "IRQPP1", "IRQPPMMU1",
+					"IRQPP2", "IRQPPMMU2","IRQPP4", "IRQPPMMU4",
+					"IRQPP5", "IRQPPMMU5", "IRQPP6", "IRQPPMMU6";
+		};
+
+		i2s0: hi6210_i2s {
+			compatible = "hisilicon,hi6210-i2s";
+			reg = <0x0 0xf7118000 0x0 0x8000>, /* i2s unit */
+			      <0x0 0xf7030000 0x0 0x400>,  /* syscon */
+			      <0x0 0xf7032000 0x0 0x400>;  /* pmctrl */
+			interrupts = <0 123 0x4>; /* 155 "DigACodec_intr" - 32 */
+			pinctrl-names = "default";
+			pinctrl-0 = <&bt_pmx_func &bt_cfg_func>;
+			clocks = <&sys_ctrl HI6220_DACODEC_PCLK>,
+				 <&sys_ctrl HI6220_BBPPLL0_DIV>;
+			clock-names = "dacodec", "i2s-base";
+			dmas = <&dma0 15 &dma0 14>;
+	                dma-names = "rx", "tx";
+		};
+
+		hi6210_hdmi_card: hi6210_hdmi_card {
+			compatible = "hisilicon,hi6210-hdmi-audio-card";
+			reg = <0 0 0 0>;
+			sound-dai = <&i2s0>;
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/hisilicon/hikey-pinctrl.dtsi b/arch/arm64/boot/dts/hisilicon/hikey-pinctrl.dtsi
new file mode 100644
index 000000000000..d103cf10f0fa
--- /dev/null
+++ b/arch/arm64/boot/dts/hisilicon/hikey-pinctrl.dtsi
@@ -0,0 +1,727 @@
+/*
+ * pinctrl dts fils for Hislicon HiKey development board
+ *
+ */
+#include <dt-bindings/pinctrl/hisi.h>
+
+/ {
+	soc {
+		pmx0: pinmux@f7010000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <
+				&boot_sel_pmx_func
+				&hkadc_ssi_pmx_func
+				&codec_clk_pmx_func
+				&pwm_in_pmx_func
+				&bl_pwm_pmx_func
+				>;
+
+			boot_sel_pmx_func: boot_sel_pmx_func {
+				pinctrl-single,pins = <
+					0x0    MUX_M0	/* BOOT_SEL     (IOMG000) */
+				>;
+			};
+
+			emmc_pmx_func: emmc_pmx_func {
+				pinctrl-single,pins = <
+					0x100  MUX_M0	/* EMMC_CLK     (IOMG064) */
+					0x104  MUX_M0	/* EMMC_CMD     (IOMG065) */
+					0x108  MUX_M0	/* EMMC_DATA0   (IOMG066) */
+					0x10c  MUX_M0	/* EMMC_DATA1   (IOMG067) */
+					0x110  MUX_M0	/* EMMC_DATA2   (IOMG068) */
+					0x114  MUX_M0	/* EMMC_DATA3   (IOMG069) */
+					0x118  MUX_M0	/* EMMC_DATA4   (IOMG070) */
+					0x11c  MUX_M0	/* EMMC_DATA5   (IOMG071) */
+					0x120  MUX_M0	/* EMMC_DATA6   (IOMG072) */
+					0x124  MUX_M0	/* EMMC_DATA7   (IOMG073) */
+				>;
+			};
+
+			sd_pmx_func: sd_pmx_func {
+				pinctrl-single,pins = <
+					0xc    MUX_M0	/* SD_CLK       (IOMG003) */
+					0x10   MUX_M0	/* SD_CMD       (IOMG004) */
+					0x14   MUX_M0	/* SD_DATA0     (IOMG005) */
+					0x18   MUX_M0	/* SD_DATA1     (IOMG006) */
+					0x1c   MUX_M0	/* SD_DATA2     (IOMG007) */
+					0x20   MUX_M0	/* SD_DATA3     (IOMG008) */
+				>;
+			};
+			sd_pmx_idle: sd_pmx_idle {
+				pinctrl-single,pins = <
+					0xc    MUX_M1	/* SD_CLK       (IOMG003) */
+					0x10   MUX_M1	/* SD_CMD       (IOMG004) */
+					0x14   MUX_M1	/* SD_DATA0     (IOMG005) */
+					0x18   MUX_M1	/* SD_DATA1     (IOMG006) */
+					0x1c   MUX_M1	/* SD_DATA2     (IOMG007) */
+					0x20   MUX_M1	/* SD_DATA3     (IOMG008) */
+				>;
+			};
+
+			sdio_pmx_func: sdio_pmx_func {
+				pinctrl-single,pins = <
+					0x128  MUX_M0	/* SDIO_CLK     (IOMG074) */
+					0x12c  MUX_M0	/* SDIO_CMD     (IOMG075) */
+					0x130  MUX_M0	/* SDIO_DATA0   (IOMG076) */
+					0x134  MUX_M0	/* SDIO_DATA1   (IOMG077) */
+					0x138  MUX_M0	/* SDIO_DATA2   (IOMG078) */
+					0x13c  MUX_M0	/* SDIO_DATA3   (IOMG079) */
+				>;
+			};
+			sdio_pmx_idle: sdio_pmx_idle {
+				pinctrl-single,pins = <
+					0x128  MUX_M1	/* SDIO_CLK     (IOMG074) */
+					0x12c  MUX_M1	/* SDIO_CMD     (IOMG075) */
+					0x130  MUX_M1	/* SDIO_DATA0   (IOMG076) */
+					0x134  MUX_M1	/* SDIO_DATA1   (IOMG077) */
+					0x138  MUX_M1	/* SDIO_DATA2   (IOMG078) */
+					0x13c  MUX_M1	/* SDIO_DATA3   (IOMG079) */
+				>;
+			};
+
+			isp_pmx_func: isp_pmx_func {
+				pinctrl-single,pins = <
+					0x24   MUX_M0	/* ISP_PWDN0    (IOMG009) */
+					0x28   MUX_M0	/* ISP_PWDN1    (IOMG010) */
+					0x2c   MUX_M0	/* ISP_PWDN2    (IOMG011) */
+					0x30   MUX_M1	/* ISP_SHUTTER0 (IOMG012) */
+					0x34   MUX_M1	/* ISP_SHUTTER1 (IOMG013) */
+					0x38   MUX_M1	/* ISP_PWM      (IOMG014) */
+					0x3c   MUX_M0	/* ISP_CCLK0    (IOMG015) */
+					0x40   MUX_M0	/* ISP_CCLK1    (IOMG016) */
+					0x44   MUX_M0	/* ISP_RESETB0  (IOMG017) */
+					0x48   MUX_M0	/* ISP_RESETB1  (IOMG018) */
+					0x4c   MUX_M1	/* ISP_STROBE0  (IOMG019) */
+					0x50   MUX_M1	/* ISP_STROBE1  (IOMG020) */
+					0x54   MUX_M0	/* ISP_SDA0     (IOMG021) */
+					0x58   MUX_M0	/* ISP_SCL0     (IOMG022) */
+					0x5c   MUX_M0	/* ISP_SDA1     (IOMG023) */
+					0x60   MUX_M0	/* ISP_SCL1     (IOMG024) */
+				>;
+			};
+
+			hkadc_ssi_pmx_func: hkadc_ssi_pmx_func {
+				pinctrl-single,pins = <
+					0x68   MUX_M0	/* HKADC_SSI    (IOMG026) */
+				>;
+			};
+
+			codec_clk_pmx_func: codec_clk_pmx_func {
+				pinctrl-single,pins = <
+					0x6c   MUX_M0	/* CODEC_CLK    (IOMG027) */
+				>;
+			};
+
+			codec_pmx_func: codec_pmx_func {
+				pinctrl-single,pins = <
+					0x70   MUX_M1	/* DMIC_CLK     (IOMG028) */
+					0x74   MUX_M0	/* CODEC_SYNC   (IOMG029) */
+					0x78   MUX_M0	/* CODEC_DI     (IOMG030) */
+					0x7c   MUX_M0	/* CODEC_DO     (IOMG031) */
+				>;
+			};
+
+			fm_pmx_func: fm_pmx_func {
+				pinctrl-single,pins = <
+					0x80   MUX_M1	/* FM_XCLK      (IOMG032) */
+					0x84   MUX_M1	/* FM_XFS       (IOMG033) */
+					0x88   MUX_M1	/* FM_DI        (IOMG034) */
+					0x8c   MUX_M1	/* FM_DO        (IOMG035) */
+				>;
+			};
+
+			bt_pmx_func: bt_pmx_func {
+				pinctrl-single,pins = <
+					0x90   MUX_M0	/* BT_XCLK      (IOMG036) */
+					0x94   MUX_M0	/* BT_XFS       (IOMG037) */
+					0x98   MUX_M0	/* BT_DI        (IOMG038) */
+					0x9c   MUX_M0	/* BT_DO        (IOMG039) */
+				>;
+			};
+
+			pwm_in_pmx_func: pwm_in_pmx_func {
+				pinctrl-single,pins = <
+					0xb8   MUX_M1	/* PWM_IN       (IOMG046) */
+				>;
+			};
+
+			bl_pwm_pmx_func: bl_pwm_pmx_func {
+				pinctrl-single,pins = <
+					0xbc   MUX_M1	/* BL_PWM       (IOMG047) */
+				>;
+			};
+
+			uart0_pmx_func: uart0_pmx_func {
+				pinctrl-single,pins = <
+					0xc0   MUX_M0	/* UART0_RXD    (IOMG048) */
+					0xc4   MUX_M0	/* UART0_TXD    (IOMG049) */
+				>;
+			};
+
+			uart1_pmx_func: uart1_pmx_func {
+				pinctrl-single,pins = <
+					0xc8   MUX_M0	/* UART1_CTS_N  (IOMG050) */
+					0xcc   MUX_M0	/* UART1_RTS_N  (IOMG051) */
+					0xd0   MUX_M0	/* UART1_RXD    (IOMG052) */
+					0xd4   MUX_M0	/* UART1_TXD    (IOMG053) */
+				>;
+			};
+
+			uart2_pmx_func: uart2_pmx_func {
+				pinctrl-single,pins = <
+					0xd8   MUX_M0	/* UART2_CTS_N  (IOMG054) */
+					0xdc   MUX_M0	/* UART2_RTS_N  (IOMG055) */
+					0xe0   MUX_M0	/* UART2_RXD    (IOMG056) */
+					0xe4   MUX_M0	/* UART2_TXD    (IOMG057) */
+				>;
+			};
+
+			uart3_pmx_func: uart3_pmx_func {
+				pinctrl-single,pins = <
+					0x180  MUX_M1	/* UART3_CTS_N  (IOMG096) */
+					0x184  MUX_M1	/* UART3_RTS_N  (IOMG097) */
+					0x188  MUX_M1	/* UART3_RXD    (IOMG098) */
+					0x18c  MUX_M1	/* UART3_TXD    (IOMG099) */
+				>;
+			};
+
+			uart4_pmx_func: uart4_pmx_func {
+				pinctrl-single,pins = <
+					0x1d0  MUX_M1	/* UART4_CTS_N  (IOMG116) */
+					0x1d4  MUX_M1	/* UART4_RTS_N  (IOMG117) */
+					0x1d8  MUX_M1	/* UART4_RXD    (IOMG118) */
+					0x1dc  MUX_M1	/* UART4_TXD    (IOMG119) */
+				>;
+			};
+
+			uart5_pmx_func: uart5_pmx_func {
+				pinctrl-single,pins = <
+					0x1c8  MUX_M1	/* UART5_RXD    (IOMG114) */
+					0x1cc  MUX_M1	/* UART5_TXD    (IOMG115) */
+				>;
+			};
+
+			i2c0_pmx_func: i2c0_pmx_func {
+				pinctrl-single,pins = <
+					0xe8   MUX_M0	/* I2C0_SCL     (IOMG058) */
+					0xec   MUX_M0	/* I2C0_SDA     (IOMG059) */
+				>;
+			};
+
+			i2c1_pmx_func: i2c1_pmx_func {
+				pinctrl-single,pins = <
+					0xf0   MUX_M0	/* I2C1_SCL     (IOMG060) */
+					0xf4   MUX_M0	/* I2C1_SDA     (IOMG061) */
+				>;
+			};
+
+			i2c2_pmx_func: i2c2_pmx_func {
+				pinctrl-single,pins = <
+					0xf8   MUX_M0	/* I2C2_SCL     (IOMG062) */
+					0xfc   MUX_M0	/* I2C2_SDA     (IOMG063) */
+				>;
+			};
+
+			spi0_pmx_func: spi0_pmx_func {
+				pinctrl-single,pins = <
+					0x1a0  MUX_M1   /* SPI0_DI      (IOMG104) */
+					0x1a4  MUX_M1	/* SPI0_DO	(IOMG105) */
+					0x1a8  MUX_M1	/* SPI0_CS_N	(IOMG106) */
+					0x1ac  MUX_M1	/* SPI0_CLK	(IOMG107) */
+				>;
+			};
+
+			modem_pcm_pmx_func: modem_pcm_pmx_func {
+				pinctrl-single,pins = <
+					0x198  MUX_M3	/* MODEM_PCM_XCLK  (IOMG102) */
+					0x19c  MUX_M3	/* MODEM_PCM_XFS  (IOMG103) */
+					0x1d0  MUX_M3	/* MODEM_PCM_DI  (IOMG116) */
+					0x1d4  MUX_M3	/* MODEM_PCM_DO  (IOMG117) */
+				>;
+			};
+
+		};
+
+		pmx1: pinmux@f7010800 {
+
+			pinctrl-names = "default";
+			pinctrl-0 = <
+				&boot_sel_cfg_func
+				&hkadc_ssi_cfg_func
+				&codec_clk_cfg_func
+				&pwm_in_cfg_func
+				&bl_pwm_cfg_func
+				>;
+
+			boot_sel_cfg_func: boot_sel_cfg_func {
+				pinctrl-single,pins = <
+					0x0    0x0	/* BOOT_SEL     (IOCFG000) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			hkadc_ssi_cfg_func: hkadc_ssi_cfg_func {
+				pinctrl-single,pins = <
+					0x6c   0x0	/* HKADC_SSI    (IOCFG027) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			emmc_clk_cfg_func: emmc_clk_cfg_func {
+				pinctrl-single,pins = <
+					0x104  0x0	/* EMMC_CLK     (IOCFG065) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_08MA DRIVE_MASK>;
+			};
+
+			emmc_cfg_func: emmc_cfg_func {
+				pinctrl-single,pins = <
+					0x108  0x0	/* EMMC_CMD     (IOCFG066) */
+					0x10c  0x0	/* EMMC_DATA0   (IOCFG067) */
+					0x110  0x0	/* EMMC_DATA1   (IOCFG068) */
+					0x114  0x0	/* EMMC_DATA2   (IOCFG069) */
+					0x118  0x0	/* EMMC_DATA3   (IOCFG070) */
+					0x11c  0x0	/* EMMC_DATA4   (IOCFG071) */
+					0x120  0x0	/* EMMC_DATA5   (IOCFG072) */
+					0x124  0x0	/* EMMC_DATA6   (IOCFG073) */
+					0x128  0x0	/* EMMC_DATA7   (IOCFG074) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+
+			emmc_rst_cfg_func: emmc_rst_cfg_func {
+				pinctrl-single,pins = <
+					0x12c  0x0	/* EMMC_RST_N   (IOCFG075) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+
+			sd_clk_cfg_func: sd_clk_cfg_func {
+				pinctrl-single,pins = <
+					0xc    0x0	/* SD_CLK       (IOCFG003) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_10MA DRIVE_MASK>;
+			};
+			sd_clk_cfg_idle: sd_clk_cfg_idle {
+				pinctrl-single,pins = <
+					0xc    0x0	/* SD_CLK       (IOCFG003) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			sd_cfg_func: sd_cfg_func {
+				pinctrl-single,pins = <
+					0x10   0x0	/* SD_CMD       (IOCFG004) */
+					0x14   0x0	/* SD_DATA0     (IOCFG005) */
+					0x18   0x0	/* SD_DATA1     (IOCFG006) */
+					0x1c   0x0	/* SD_DATA2     (IOCFG007) */
+					0x20   0x0	/* SD_DATA3     (IOCFG008) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_08MA DRIVE_MASK>;
+			};
+			sd_cfg_idle: sd_cfg_idle {
+				pinctrl-single,pins = <
+					0x10   0x0	/* SD_CMD       (IOCFG004) */
+					0x14   0x0	/* SD_DATA0     (IOCFG005) */
+					0x18   0x0	/* SD_DATA1     (IOCFG006) */
+					0x1c   0x0	/* SD_DATA2     (IOCFG007) */
+					0x20   0x0	/* SD_DATA3     (IOCFG008) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			sdio_clk_cfg_func: sdio_clk_cfg_func {
+				pinctrl-single,pins = <
+					0x134  0x0	/* SDIO_CLK     (IOCFG077) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_08MA DRIVE_MASK>;
+			};
+			sdio_clk_cfg_idle: sdio_clk_cfg_idle {
+				pinctrl-single,pins = <
+					0x134  0x0	/* SDIO_CLK     (IOCFG077) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			sdio_cfg_func: sdio_cfg_func {
+				pinctrl-single,pins = <
+					0x138  0x0	/* SDIO_CMD     (IOCFG078) */
+					0x13c  0x0	/* SDIO_DATA0   (IOCFG079) */
+					0x140  0x0	/* SDIO_DATA1   (IOCFG080) */
+					0x144  0x0	/* SDIO_DATA2   (IOCFG081) */
+					0x148  0x0	/* SDIO_DATA3   (IOCFG082) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+			sdio_cfg_idle: sdio_cfg_idle {
+				pinctrl-single,pins = <
+					0x138  0x0	/* SDIO_CMD     (IOCFG078) */
+					0x13c  0x0	/* SDIO_DATA0   (IOCFG079) */
+					0x140  0x0	/* SDIO_DATA1   (IOCFG080) */
+					0x144  0x0	/* SDIO_DATA2   (IOCFG081) */
+					0x148  0x0	/* SDIO_DATA3   (IOCFG082) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			isp_cfg_func1: isp_cfg_func1 {
+				pinctrl-single,pins = <
+					0x28   0x0	/* ISP_PWDN0    (IOCFG010) */
+					0x2c   0x0	/* ISP_PWDN1    (IOCFG011) */
+					0x30   0x0	/* ISP_PWDN2    (IOCFG012) */
+					0x34   0x0	/* ISP_SHUTTER0 (IOCFG013) */
+					0x38   0x0	/* ISP_SHUTTER1 (IOCFG014) */
+					0x3c   0x0	/* ISP_PWM      (IOCFG015) */
+					0x40   0x0	/* ISP_CCLK0    (IOCFG016) */
+					0x44   0x0	/* ISP_CCLK1    (IOCFG017) */
+					0x48   0x0	/* ISP_RESETB0  (IOCFG018) */
+					0x4c   0x0	/* ISP_RESETB1  (IOCFG019) */
+					0x50   0x0	/* ISP_STROBE0  (IOCFG020) */
+					0x58   0x0	/* ISP_SDA0     (IOCFG022) */
+					0x5c   0x0	/* ISP_SCL0     (IOCFG023) */
+					0x60   0x0	/* ISP_SDA1     (IOCFG024) */
+					0x64   0x0	/* ISP_SCL1     (IOCFG025) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+			isp_cfg_idle1: isp_cfg_idle1 {
+				pinctrl-single,pins = <
+					0x34   0x0	/* ISP_SHUTTER0 (IOCFG013) */
+					0x38   0x0	/* ISP_SHUTTER1 (IOCFG014) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			isp_cfg_func2: isp_cfg_func2 {
+				pinctrl-single,pins = <
+					0x54   0x0	/* ISP_STROBE1  (IOCFG021) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			codec_clk_cfg_func: codec_clk_cfg_func {
+				pinctrl-single,pins = <
+					0x70   0x0	/* CODEC_CLK    (IOCFG028) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+			codec_clk_cfg_idle: codec_clk_cfg_idle {
+				pinctrl-single,pins = <
+					0x70   0x0	/* CODEC_CLK    (IOCFG028) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			codec_cfg_func1: codec_cfg_func1 {
+				pinctrl-single,pins = <
+					0x74   0x0	/* DMIC_CLK     (IOCFG029) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			codec_cfg_func2: codec_cfg_func2 {
+				pinctrl-single,pins = <
+					0x78   0x0	/* CODEC_SYNC   (IOCFG030) */
+					0x7c   0x0	/* CODEC_DI     (IOCFG031) */
+					0x80   0x0	/* CODEC_DO     (IOCFG032) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+			codec_cfg_idle2: codec_cfg_idle2 {
+				pinctrl-single,pins = <
+					0x78   0x0	/* CODEC_SYNC   (IOCFG030) */
+					0x7c   0x0	/* CODEC_DI     (IOCFG031) */
+					0x80   0x0	/* CODEC_DO     (IOCFG032) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			fm_cfg_func: fm_cfg_func {
+				pinctrl-single,pins = <
+					0x84   0x0	/* FM_XCLK      (IOCFG033) */
+					0x88   0x0	/* FM_XFS       (IOCFG034) */
+					0x8c   0x0	/* FM_DI        (IOCFG035) */
+					0x90   0x0	/* FM_DO        (IOCFG036) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			bt_cfg_func: bt_cfg_func {
+				pinctrl-single,pins = <
+					0x94   0x0	/* BT_XCLK      (IOCFG037) */
+					0x98   0x0	/* BT_XFS       (IOCFG038) */
+					0x9c   0x0	/* BT_DI        (IOCFG039) */
+					0xa0   0x0	/* BT_DO        (IOCFG040) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+			bt_cfg_idle: bt_cfg_idle {
+				pinctrl-single,pins = <
+					0x94   0x0	/* BT_XCLK      (IOCFG037) */
+					0x98   0x0	/* BT_XFS       (IOCFG038) */
+					0x9c   0x0	/* BT_DI        (IOCFG039) */
+					0xa0   0x0	/* BT_DO        (IOCFG040) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			pwm_in_cfg_func: pwm_in_cfg_func {
+				pinctrl-single,pins = <
+					0xbc   0x0	/* PWM_IN       (IOCFG047) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			bl_pwm_cfg_func: bl_pwm_cfg_func {
+				pinctrl-single,pins = <
+					0xc0   0x0	/* BL_PWM       (IOCFG048) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart0_cfg_func1: uart0_cfg_func1 {
+				pinctrl-single,pins = <
+					0xc4   0x0	/* UART0_RXD    (IOCFG049) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart0_cfg_func2: uart0_cfg_func2 {
+				pinctrl-single,pins = <
+					0xc8   0x0	/* UART0_TXD    (IOCFG050) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_04MA DRIVE_MASK>;
+			};
+
+			uart1_cfg_func1: uart1_cfg_func1 {
+				pinctrl-single,pins = <
+					0xcc   0x0	/* UART1_CTS_N  (IOCFG051) */
+					0xd4   0x0	/* UART1_RXD    (IOCFG053) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_UP   PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart1_cfg_func2: uart1_cfg_func2 {
+				pinctrl-single,pins = <
+					0xd0   0x0	/* UART1_RTS_N  (IOCFG052) */
+					0xd8   0x0	/* UART1_TXD    (IOCFG054) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart2_cfg_func: uart2_cfg_func {
+				pinctrl-single,pins = <
+					0xdc   0x0	/* UART2_CTS_N  (IOCFG055) */
+					0xe0   0x0	/* UART2_RTS_N  (IOCFG056) */
+					0xe4   0x0	/* UART2_RXD    (IOCFG057) */
+					0xe8   0x0	/* UART2_TXD    (IOCFG058) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart3_cfg_func: uart3_cfg_func {
+				pinctrl-single,pins = <
+					0x190  0x0	/* UART3_CTS_N  (IOCFG100) */
+					0x194  0x0	/* UART3_RTS_N  (IOCFG101) */
+					0x198  0x0	/* UART3_RXD    (IOCFG102) */
+					0x19c  0x0	/* UART3_TXD    (IOCFG103) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart4_cfg_func: uart4_cfg_func {
+				pinctrl-single,pins = <
+					0x1e0  0x0	/* UART4_CTS_N  (IOCFG120) */
+					0x1e4  0x0	/* UART4_RTS_N  (IOCFG121) */
+					0x1e8  0x0	/* UART4_RXD    (IOCFG122) */
+					0x1ec  0x0	/* UART4_TXD    (IOCFG123) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			uart5_cfg_func: uart5_cfg_func {
+				pinctrl-single,pins = <
+					0x1d8  0x0	/* UART4_RXD    (IOCFG118) */
+					0x1dc  0x0	/* UART4_TXD    (IOCFG119) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			i2c0_cfg_func: i2c0_cfg_func {
+				pinctrl-single,pins = <
+					0xec   0x0	/* I2C0_SCL     (IOCFG059) */
+					0xf0   0x0	/* I2C0_SDA     (IOCFG060) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			i2c1_cfg_func: i2c1_cfg_func {
+				pinctrl-single,pins = <
+					0xf4   0x0	/* I2C1_SCL     (IOCFG061) */
+					0xf8   0x0	/* I2C1_SDA     (IOCFG062) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			i2c2_cfg_func: i2c2_cfg_func {
+				pinctrl-single,pins = <
+					0xfc   0x0	/* I2C2_SCL     (IOCFG063) */
+					0x100  0x0	/* I2C2_SDA     (IOCFG064) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			spi0_cfg_func: spi0_cfg_func {
+				pinctrl-single,pins = <
+					0x1b0  0x0	/* SPI0_DI	(IOCFG108) */
+					0x1b4  0x0	/* SPI0_DO	(IOCFG109) */
+					0x1b8  0x0	/* SPI0_CS_N	(IOCFG110) */
+					0x1bc  0x0	/* SPI0_CLK	(IOCFG111) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			modem_pcm_cfg_func: modem_pcm_cfg_func {
+				pinctrl-single,pins = <
+					0x1a8  0x0	/* MODEM_PCM_XCLK  (IOCFG106) */
+					0x1ac  0x0	/* MODEM_PCM_XFS  (IOCFG107) */
+					0x1e0  0x0	/* MODEM_PCM_DI  (IOCFG120) */
+					0x1e4  0x0	/* MODEM_PCM_DO  (IOCFG121) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DOWN  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE2_02MA DRIVE_MASK>;
+			};
+		};
+
+		pmx2: pinmux@f8001800 {
+
+			pinctrl-names = "default";
+			pinctrl-0 = <
+				&rstout_n_cfg_func
+				>;
+
+			rstout_n_cfg_func: rstout_n_cfg_func {
+				pinctrl-single,pins = <
+					0x0    0x0	/* RSTOUT_N     (IOCFG000) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			pmu_peri_en_cfg_func: pmu_peri_en_cfg_func {
+				pinctrl-single,pins = <
+					0x4    0x0	/* PMU_PERI_EN  (IOCFG001) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			sysclk0_en_cfg_func: sysclk0_en_cfg_func {
+				pinctrl-single,pins = <
+					0x8    0x0	/* SYSCLK0_EN   (IOCFG002) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+
+			jtag_tdo_cfg_func: jtag_tdo_cfg_func {
+				pinctrl-single,pins = <
+					0xc    0x0	/* JTAG_TDO     (IOCFG003) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_08MA DRIVE_MASK>;
+			};
+
+			rf_reset_cfg_func: rf_reset_cfg_func {
+				pinctrl-single,pins = <
+					0x70   0x0	/* RF_RESET0    (IOCFG028) */
+					0x74   0x0	/* RF_RESET1    (IOCFG029) */
+				>;
+				pinctrl-single,bias-pulldown  = <PULL_DIS  PULL_DOWN PULL_DIS  PULL_DOWN>;
+				pinctrl-single,bias-pullup    = <PULL_DIS  PULL_UP   PULL_DIS  PULL_UP>;
+				pinctrl-single,drive-strength = <DRIVE1_02MA DRIVE_MASK>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index bdd7aa358d2a..8cc09af319eb 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,3 +1,5 @@
+CONFIG_TEE=y
+CONFIG_OPTEE=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -58,11 +60,25 @@ CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_CMA_SIZE_MBYTES=128
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_COMMON=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPUFREQ_DT=y
+CONFIG_ARM_HISI_ACPU_CPUFREQ=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -72,8 +88,25 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
+# CONFIG_ANDROID_PARANOID_NETWORK is not set
 CONFIG_BPF_JIT=y
-# CONFIG_WIRELESS is not set
+CONFIG_BT=m
+CONFIG_BT_LEDS=y
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_SELFTEST=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_WILINK=m
+CONFIG_CFG80211=m
+# CONFIG_CFG80211_DEFAULT_PS is not set
+CONFIG_MAC80211=m
+CONFIG_MAC80211_LEDS=y
+CONFIG_RFKILL=m
+# CONFIG_RFKILL_PM is not set
 CONFIG_NET_9P=y
 CONFIG_NET_9P_VIRTIO=y
 # CONFIG_TEGRA_AHB is not set
@@ -83,6 +116,8 @@ CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DMA_CMA=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_TI_ST=m
+CONFIG_ST_HCI=m
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
@@ -100,9 +135,26 @@ CONFIG_NET_XGENE=y
 CONFIG_SKY2=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
-# CONFIG_WLAN is not set
+CONFIG_USB_NET_DRIVERS=y
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_RTL8152=m
+CONFIG_USB_USBNET=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_SR9800=m
+CONFIG_USB_NET_SMSC75XX=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_WL_TI=y
+CONFIG_WL18XX=m
+CONFIG_WLCORE_SDIO=m
 CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_GPIO=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_FT5X0X=y
+CONFIG_INPUT_MISC=y
+CONFIG_HISI_POWERKEY=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_AMBAKMI=y
 CONFIG_LEGACY_PTY_COUNT=16
@@ -124,34 +176,73 @@ CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
 CONFIG_I2C_QUP=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_QUP=y
+CONFIG_SPI_SPIDEV=m
+CONFIG_PINCTRL_SINGLE=y
 CONFIG_PINCTRL_MSM8916=y
+CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_PL061=y
 CONFIG_GPIO_XGENE=y
 CONFIG_POWER_RESET_XGENE=y
 CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_THERMAL_OF=y
+CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR=y
+CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
+CONFIG_CPU_THERMAL=y
+CONFIG_HISI_THERMAL=y
+CONFIG_MFD_HI655X_PMIC=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_HI6220_MTCMOS=y
+CONFIG_REGULATOR_HI655X=y
 CONFIG_REGULATOR_QCOM_SMD_RPM=y
 CONFIG_FB=y
+CONFIG_DRM=y
+CONFIG_DRM_HISI_KIRIN=y
+CONFIG_HISI_KIRIN_DW_DSI=y
+CONFIG_DRM_CMA_FBDEV_BUFFER_NUM=2
+CONFIG_DRM_I2C_ADV7511=y
+CONFIG_DRM_PANEL_HIKEY=y
+CONFIG_ION_DUMMY=y
+CONFIG_MALI400=y
+CONFIG_MALI450=y
+CONFIG_MALI400_DEBUG=y
+# CONFIG_MALI400_PROFILING is not set
+# CONFIG_MALI_DVFS is not set
+CONFIG_MALI_SHARED_INTERRUPTS=y
+CONFIG_MALI_DT=y
+CONFIG_MALI_PLAT_SPECIFIC_DT=y
+CONFIG_DRM_ICN_6201=y
 CONFIG_FB_ARMCLCD=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_VERBOSE=y
+CONFIG_SND_SOC=y
+CONFIG_SND_I2S_HI6210_I2S=y
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_DWC2=y
 CONFIG_USB_ISP1760=y
 CONFIG_USB_ULPI=y
 CONFIG_MMC=y
+CONFIG_MMC_BLOCK_MINORS=16
 CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
@@ -160,8 +251,10 @@ CONFIG_MMC_DW=y
 CONFIG_MMC_DW_IDMAC=y
 CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
+CONFIG_MMC_DW_K3=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_SYSCON=y
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
@@ -170,17 +263,23 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
+CONFIG_K3_DMA=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_STUB_CLK_HI6220=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
+CONFIG_MAILBOX=y
+CONFIG_HI6220_MBOX=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_COMMON_RESET_HI6220=y
+CONFIG_PHY_HI6220_USB=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
@@ -197,7 +296,8 @@ CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
 CONFIG_EFIVAR_FS=y
-# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_PSTORE=m
+CONFIG_PSTORE_RAM=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a3e9d6fdbf21..bbd362cd1ed1 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -22,6 +22,15 @@ void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
+struct sched_domain;
+#ifdef CONFIG_CPU_FREQ
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
+extern unsigned long cpufreq_scale_max_freq_capacity(int cpu);
+#endif
+#define arch_scale_cpu_capacity scale_cpu_capacity
+extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f8b13a..27bf1e5180a1 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,10 +14,10 @@ CFLAGS_REMOVE_return_address.o = -pg
 arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
+			   hyp-stub.o psci.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
-			   smp.o smp_spin_table.o topology.o
+			   smp.o smp_spin_table.o topology.o smccc-call.o
 
 extra-$(CONFIG_EFI)			:= efi-entry.o
 
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 3b6d8cc9dfe0..678f30b05a45 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -26,6 +26,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/arm-smccc.h>
 
 #include <asm/checksum.h>
 
@@ -68,3 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit);
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
+
+	/* arm-smccc */
+EXPORT_SYMBOL(arm_smccc_smc);
+EXPORT_SYMBOL(arm_smccc_hvc);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 087cf9a65359..7c4146a4257b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -28,6 +28,7 @@
 #include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
 #include <linux/kbuild.h>
+#include <linux/arm-smccc.h>
 
 int main(void)
 {
@@ -162,5 +163,7 @@ int main(void)
   DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
   DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
 #endif
+  DEFINE(ARM_SMCCC_RES_X0_OFFS,	offsetof(struct arm_smccc_res, a0));
+  DEFINE(ARM_SMCCC_RES_X2_OFFS,	offsetof(struct arm_smccc_res, a2));
   return 0;
 }
diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm64/kernel/psci-call.S
deleted file mode 100644
index cf83e61cd3b5..000000000000
--- a/arch/arm64/kernel/psci-call.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2015 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-
-/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */
-ENTRY(__invoke_psci_fn_hvc)
-	hvc	#0
-	ret
-ENDPROC(__invoke_psci_fn_hvc)
-
-/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */
-ENTRY(__invoke_psci_fn_smc)
-	smc	#0
-	ret
-ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
new file mode 100644
index 000000000000..ae0496fa4235
--- /dev/null
+++ b/arch/arm64/kernel/smccc-call.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License Version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+	.macro SMCCC instr
+	.cfi_startproc
+	\instr	#0
+	ldr	x4, [sp]
+	stp	x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
+	stp	x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
+	ret
+	.cfi_endproc
+	.endm
+
+/*
+ * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
+ *		  unsigned long a3, unsigned long a4, unsigned long a5,
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ */
+ENTRY(arm_smccc_smc)
+	SMCCC	smc
+ENDPROC(arm_smccc_smc)
+
+/*
+ * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
+ *		  unsigned long a3, unsigned long a4, unsigned long a5,
+ *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
+ */
+ENTRY(arm_smccc_hvc)
+	SMCCC	hvc
+ENDPROC(arm_smccc_hvc)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6deedbab..5b2c67a510d8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,30 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/sched_energy.h>
 
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+
+unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+#ifdef CONFIG_CPU_FREQ
+	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
+
+	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
+#else
+	return per_cpu(cpu_scale, cpu);
+#endif
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+	per_cpu(cpu_scale, cpu) = capacity;
+}
+
 static int __init get_cpu_for_node(struct device_node *node)
 {
 	struct device_node *cpu_node;
@@ -206,11 +226,67 @@ out:
 struct cpu_topology cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
+/* sd energy functions */
+static inline
+const struct sched_group_energy * const cpu_cluster_energy(int cpu)
+{
+	struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1];
+
+	if (!sge) {
+		pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu);
+		return NULL;
+	}
+
+	return sge;
+}
+
+static inline
+const struct sched_group_energy * const cpu_core_energy(int cpu)
+{
+	struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0];
+
+	if (!sge) {
+		pr_warn("Invalid sched_group_energy for CPU%d\n", cpu);
+		return NULL;
+	}
+
+	return sge;
+}
+
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	return &cpu_topology[cpu].core_sibling;
 }
 
+static inline int cpu_corepower_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN | \
+	       SD_SHARE_CAP_STATES;
+}
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void update_cpu_capacity(unsigned int cpu)
+{
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
+
+	if (cpu_core_energy(cpu)) {
+		int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
+		capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
+	}
+
+	set_capacity_scale(cpu, capacity);
+
+	pr_info("CPU%d: update cpu_capacity %lu\n",
+		cpu, arch_scale_cpu_capacity(NULL, cpu));
+}
+
 static void update_siblings_masks(unsigned int cpuid)
 {
 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
@@ -272,6 +348,7 @@ void store_cpu_topology(unsigned int cpuid)
 
 topology_populated:
 	update_siblings_masks(cpuid);
+	update_cpu_capacity(cpuid);
 }
 
 static void __init reset_cpu_topology(void)
@@ -302,4 +379,8 @@ void __init init_cpu_topology(void)
 	 */
 	if (of_have_populated_dt() && parse_dt_topology())
 		reset_cpu_topology();
+	else
+		set_sched_topology(arm64_topology);
+
+	init_sched_energy_costs();
 }
diff --git a/drivers/Kconfig b/drivers/Kconfig
index d2ac339de85f..63baceb6c118 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -198,4 +198,6 @@ source "drivers/hwtracing/intel_th/Kconfig"
 
 source "drivers/fpga/Kconfig"
 
+source "drivers/tee/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 098997f2cc3a..a2a1fbd53bbe 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -173,3 +173,4 @@ obj-$(CONFIG_STM)		+= hwtracing/stm/
 obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
 obj-$(CONFIG_FPGA)		+= fpga/
+obj-$(CONFIG_TEE)		+= tee/
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 57eb935aedc7..f03792e89ae1 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -30,6 +30,7 @@
 
 #include <linux/ti_wilink_st.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 /* Bluetooth Driver Version */
 #define VERSION               "1.0"
@@ -273,6 +274,14 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	return 0;
 }
 
+static const struct of_device_id btwilink_of_match[] = {
+{
+    .compatible = "btwilink",
+  },
+  {}
+};
+MODULE_DEVICE_TABLE(of, btwilink_of_match);
+
 static int bt_ti_probe(struct platform_device *pdev)
 {
 	static struct ti_st *hst;
@@ -336,6 +345,8 @@ static struct platform_driver btwilink_driver = {
 	.remove = bt_ti_remove,
 	.driver = {
 		.name = "btwilink",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(btwilink_of_match),
 	},
 };
 
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index 4563343b6420..fe5ec1d0a11b 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -11,9 +11,12 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
+#include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
@@ -34,8 +37,8 @@ static struct hisi_fixed_rate_clock hi6220_fixed_rate_clks[] __initdata = {
 	{ HI6220_PLL_BBP,	"bbppll0",	NULL, CLK_IS_ROOT, 245760000, },
 	{ HI6220_PLL_GPU,	"gpupll",	NULL, CLK_IS_ROOT, 1000000000,},
 	{ HI6220_PLL1_DDR,	"ddrpll1",	NULL, CLK_IS_ROOT, 1066000000,},
-	{ HI6220_PLL_SYS,	"syspll",	NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, CLK_IS_ROOT, 1200000000,},
+	{ HI6220_PLL_SYS,	"syspll",	NULL, CLK_IS_ROOT, 1190494208,},
+	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, CLK_IS_ROOT, 1190494208,},
 	{ HI6220_DDR_SRC,	"ddr_sel_src",  NULL, CLK_IS_ROOT, 1200000000,},
 	{ HI6220_PLL_MEDIA,	"media_pll",    NULL, CLK_IS_ROOT, 1440000000,},
 	{ HI6220_PLL_DDR,	"ddrpll0",      NULL, CLK_IS_ROOT, 1600000000,},
@@ -68,24 +71,75 @@ static struct hisi_gate_clock hi6220_separated_gate_clks_ao[] __initdata = {
 	{ HI6220_TIMER7_PCLK, "timer7_pclk", "clk_tcxo", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x630, 22, 0, },
 	{ HI6220_TIMER8_PCLK, "timer8_pclk", "clk_tcxo", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x630, 23, 0, },
 	{ HI6220_UART0_PCLK,  "uart0_pclk",  "clk_tcxo", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x630, 24, 0, },
+	{ HI6220_RTC0_PCLK,   "rtc0_pclk",   "clk_tcxo", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x630, 25, 0, },
 };
 
+#define SOC_PERI_SCTRL_BASE_ADDR	0xF7030000 /* peri ctrl base addr */
+#define SC_PERIPH_CTRL14		0x02C
+#define SC_PERIPH_STAT1			0x094
+#define SOC_PMCTRL_BASE_ADDR		0xF7032000 /* pm ctrl base addr*/
+#define SC_PM_DDRPLL_STAT		0x18
+#define SC_PM_SYSPLL_STAT		0x28
+#define SC_PM_MEDPLL_STAT		0x38
+
+static struct hisi_clock_data *clk_data_ao;
+
 static void __init hi6220_clk_ao_init(struct device_node *np)
 {
-	struct hisi_clock_data *clk_data_ao;
+	void __iomem *peri_base, *pm_base;
+	unsigned int freq_u, freq_l, freq, pll_stat;
+	int i;
 
 	clk_data_ao = hisi_clk_init(np, HI6220_AO_NR_CLKS);
 	if (!clk_data_ao)
 		return;
 
+	peri_base = ioremap(SOC_PERI_SCTRL_BASE_ADDR, 0x1000);
+	pm_base = ioremap(SOC_PMCTRL_BASE_ADDR, 0x1000);
+	/* SYSPLL is set by bootloader. Read it */
+	/* check syspll enablement status */
+	pll_stat = readl(pm_base + SC_PM_SYSPLL_STAT);
+	pr_info("SYSPLL: syspll PM status: 0x%x\n", pll_stat);
+	/* 0x2101 means to calculate clk_sys_pll */
+	writew(0x2101, peri_base + SC_PERIPH_CTRL14);
+	mdelay(1);
+	/* read back the calculated value */
+	freq_l = readw(peri_base + SC_PERIPH_STAT1);
+	freq_u = readw(peri_base + SC_PERIPH_STAT1 + 2);
+	mdelay(1);
+	freq = freq_u << 16 | freq_l;
+	pr_info("SYSPLL: syspll is read: l: 0x%04X, u: 0x%04X\n",
+							freq_l, freq_u);
+	pr_info("SYSPLL: syspll is read: 0x%X, %d\n", freq, freq);
+	if (freq == 0x00020000 || freq == 0) {
+		pr_info("SYSPLL: ERROR: read returns misterious value.\n");
+		freq = 1200000000;
+	}
+
+	/* mask off freq */
+	freq -= (freq % 100000);
+	pr_info("SYSPLL: set syspll medpll: %d\n", freq);
+
+	for (i = 0; i < ARRAY_SIZE(hi6220_fixed_rate_clks); i++) {
+		if (hi6220_fixed_rate_clks[i].id == HI6220_PLL_SYS ||
+			hi6220_fixed_rate_clks[i].id == HI6220_PLL_SYS_MEDIA) {
+			hi6220_fixed_rate_clks[i].fixed_rate = freq;
+			pr_info("SYSPLL: modified fix_rate[%d], id=%d, f=%d\n",
+				i, hi6220_fixed_rate_clks[i].id, freq);
+		}
+	}
+
 	hisi_clk_register_fixed_rate(hi6220_fixed_rate_clks,
-				ARRAY_SIZE(hi6220_fixed_rate_clks), clk_data_ao);
+				ARRAY_SIZE(hi6220_fixed_rate_clks),
+				clk_data_ao);
 
 	hisi_clk_register_fixed_factor(hi6220_fixed_factor_clks,
-				ARRAY_SIZE(hi6220_fixed_factor_clks), clk_data_ao);
+				ARRAY_SIZE(hi6220_fixed_factor_clks),
+				clk_data_ao);
 
 	hisi_clk_register_gate_sep(hi6220_separated_gate_clks_ao,
-				ARRAY_SIZE(hi6220_separated_gate_clks_ao), clk_data_ao);
+				ARRAY_SIZE(hi6220_separated_gate_clks_ao),
+				clk_data_ao);
 }
 CLK_OF_DECLARE(hi6220_clk_ao, "hisilicon,hi6220-aoctrl", hi6220_clk_ao_init);
 
@@ -192,6 +246,13 @@ static void __init hi6220_clk_sys_init(struct device_node *np)
 
 	hi6220_clk_register_divider(hi6220_div_clks_sys,
 			ARRAY_SIZE(hi6220_div_clks_sys), clk_data);
+
+	if (!clk_data_ao)
+		return;
+
+	/* enable high speed clock on UART1 mux */
+	clk_set_parent(clk_data->clk_data.clks[HI6220_UART1_SRC],
+			clk_data_ao->clk_data.clks[HI6220_150M]);
 }
 CLK_OF_DECLARE(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init);
 
diff --git a/drivers/clk/hisilicon/clkgate-separated.c b/drivers/clk/hisilicon/clkgate-separated.c
index a47812f56a17..7908bc3c9ec7 100644
--- a/drivers/clk/hisilicon/clkgate-separated.c
+++ b/drivers/clk/hisilicon/clkgate-separated.c
@@ -120,6 +120,7 @@ struct clk *hisi_register_clkgate_sep(struct device *dev, const char *name,
 	sclk->bit_idx = bit_idx;
 	sclk->flags = clk_gate_flags;
 	sclk->hw.init = &init;
+	sclk->lock = lock;
 
 	clk = clk_register(dev, &sclk->hw);
 	if (IS_ERR(clk))
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 659879a56dba..af523c539af0 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -102,6 +102,15 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 	  Be aware that not all cpufreq drivers support the conservative
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_SCHED
+	bool "sched"
+	select CPU_FREQ_GOV_SCHED
+	help
+	  Use the CPUfreq governor 'sched' as default. This scales
+	  cpu frequency using CPU utilization estimates from the
+	  scheduler.
+
 endchoice
 
 config CPU_FREQ_GOV_PERFORMANCE
@@ -183,6 +192,18 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 	  If in doubt, say N.
 
+config CPU_FREQ_GOV_SCHED
+	bool "'sched' cpufreq governor"
+	depends on CPU_FREQ
+	select CPU_FREQ_GOV_COMMON
+	help
+	  'sched' - this governor scales cpu frequency from the
+	  scheduler as a function of cpu capacity utilization. It does
+	  not evaluate utilization on a periodic basis (as ondemand
+	  does) but instead is event-driven by the scheduler.
+
+	  If in doubt, say N.
+
 comment "CPU frequency scaling drivers"
 
 config CPUFREQ_DT
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index b1f8a73e5a94..eb315b6ea7fd 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -6,6 +6,8 @@
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
 	depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK
+	# if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y
+	depends on !CPU_THERMAL || THERMAL
 	select PM_OPP
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index c5d256caa664..c251247ae661 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -23,6 +23,7 @@
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -55,6 +56,7 @@ static bool bL_switching_enabled;
 #define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
 #define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 
+static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
 static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
@@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 {
 	struct device *cpu_dev;
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpufreq_cooling_unregister(cdev[cur_cluster]);
+		cdev[cur_cluster] = NULL;
+	}
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
+static void bL_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
+	int cur_cluster = cpu_to_cluster(policy->cpu);
+	struct device_node *np;
+
+	/* Do not register a cpu_cooling device if we are in IKS mode */
+	if (cur_cluster >= MAX_CLUSTERS)
+		return;
+
+	np = of_node_get(cpu_dev->of_node);
+	if (WARN_ON(!np))
+		return;
+
+	if (of_find_property(np, "#cooling-cells", NULL)) {
+		u32 power_coefficient = 0;
+
+		of_property_read_u32(np, "dynamic-power-coefficient",
+				     &power_coefficient);
+
+		cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
+				policy->related_cpus, power_coefficient, NULL);
+		if (IS_ERR(cdev[cur_cluster])) {
+			dev_err(cpu_dev,
+				"running cpufreq without cooling device: %ld\n",
+				PTR_ERR(cdev[cur_cluster]));
+			cdev[cur_cluster] = NULL;
+		}
+	}
+	of_node_put(np);
+}
+
 static struct cpufreq_driver bL_cpufreq_driver = {
 	.name			= "arm-big-little",
 	.flags			= CPUFREQ_STICKY |
@@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.get			= bL_cpufreq_get_rate,
 	.init			= bL_cpufreq_init,
 	.exit			= bL_cpufreq_exit,
+	.ready			= bL_cpufreq_ready,
 	.attr			= cpufreq_generic_attr,
 };
 
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 90d64081ddb3..1ceece9d6711 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -407,8 +407,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 	 * thermal DT code takes care of matching them.
 	 */
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		priv->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		u32 power_coefficient = 0;
+
+		of_property_read_u32(np, "dynamic-power-coefficient",
+				     &power_coefficient);
+
+		priv->cdev = of_cpufreq_power_cooling_register(np,
+				policy->related_cpus, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ebed319657e7..18b2e1a82825 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -29,6 +29,7 @@
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
+#include <linux/sched.h>
 #include <trace/events/power.h>
 
 static LIST_HEAD(cpufreq_policy_list);
@@ -154,6 +155,12 @@ bool have_governor_per_policy(void)
 }
 EXPORT_SYMBOL_GPL(have_governor_per_policy);
 
+bool cpufreq_driver_is_slow(void)
+{
+	return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow);
+
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
 {
 	if (have_governor_per_policy())
@@ -347,6 +354,50 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 #endif
 }
 
+/*********************************************************************
+ *               FREQUENCY INVARIANT CPU CAPACITY                    *
+ *********************************************************************/
+
+static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void
+scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+{
+	unsigned long cur = freqs ? freqs->new : policy->cur;
+	unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
+	struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo;
+	int cpu;
+
+	pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
+		 cpumask_pr_args(policy->cpus), cur, policy->max, scale);
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(freq_scale, cpu) = scale;
+
+	if (freqs)
+		return;
+
+	scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq;
+
+	pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n",
+		 cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq,
+		 scale);
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(max_freq_scale, cpu) = scale;
+}
+
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(freq_scale, cpu);
+}
+
+unsigned long cpufreq_scale_max_freq_capacity(int cpu)
+{
+	return per_cpu(max_freq_scale, cpu);
+}
+
 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs, unsigned int state)
 {
@@ -423,6 +474,7 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs)
 {
+	int cpu;
 
 	/*
 	 * Catch double invocations of _begin() which lead to self-deadlock.
@@ -450,6 +502,10 @@ wait:
 
 	spin_unlock(&policy->transition_lock);
 
+	scale_freq_capacity(policy, freqs);
+	for_each_cpu(cpu, policy->cpus)
+		trace_cpu_capacity(capacity_curr_of(cpu), cpu);
+
 	cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
@@ -2131,6 +2187,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 			CPUFREQ_NOTIFY, new_policy);
 
+	scale_freq_capacity(new_policy, NULL);
+
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index d40b2c077746..151971627757 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -192,7 +192,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	}
 
 	/* Take note of the planned idle state. */
-	sched_idle_set_state(target_state);
+	sched_idle_set_state(target_state, index);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
 	time_start = ktime_get();
@@ -205,7 +205,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
-	sched_idle_set_state(NULL);
+	sched_idle_set_state(NULL, -1);
 
 	if (broadcast) {
 		if (WARN_ON_ONCE(!irqs_disabled()))
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index e6cd1a32025a..9f98a499ee64 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -279,7 +279,6 @@ config INTEL_MIC_X100_DMA
 
 config K3_DMA
 	tristate "Hisilicon K3 DMA support"
-	depends on ARCH_HI3xxx
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index 1ba2fd73852d..31333d554e08 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Linaro Ltd.
+ * Copyright (c) 2013 - 2015 Linaro Ltd.
  * Copyright (c) 2013 Hisilicon Limited.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -28,19 +28,23 @@
 
 #define INT_STAT		0x00
 #define INT_TC1			0x04
+#define INT_TC2			0x08
 #define INT_ERR1		0x0c
 #define INT_ERR2		0x10
 #define INT_TC1_MASK		0x18
+#define INT_TC2_MASK		0x1c
 #define INT_ERR1_MASK		0x20
 #define INT_ERR2_MASK		0x24
 #define INT_TC1_RAW		0x600
-#define INT_ERR1_RAW		0x608
-#define INT_ERR2_RAW		0x610
+#define INT_TC2_RAW		0x608
+#define INT_ERR1_RAW		0x610
+#define INT_ERR2_RAW		0x618
 #define CH_PRI			0x688
 #define CH_STAT			0x690
 #define CX_CUR_CNT		0x704
 #define CX_LLI			0x800
-#define CX_CNT			0x810
+#define CX_CNT1			0x80c
+#define CX_CNT0			0x810
 #define CX_SRC			0x814
 #define CX_DST			0x818
 #define CX_CFG			0x81c
@@ -49,6 +53,7 @@
 
 #define CX_LLI_CHAIN_EN		0x2
 #define CX_CFG_EN		0x1
+#define CX_CFG_NODEIRQ		BIT(1)
 #define CX_CFG_MEM2PER		(0x1 << 2)
 #define CX_CFG_PER2MEM		(0x2 << 2)
 #define CX_CFG_SRCINCR		(0x1 << 31)
@@ -81,6 +86,7 @@ struct k3_dma_chan {
 	enum dma_transfer_direction dir;
 	dma_addr_t		dev_addr;
 	enum dma_status		status;
+	bool			cyclic;
 };
 
 struct k3_dma_phy {
@@ -134,6 +140,7 @@ static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
 
 	val = 0x1 << phy->idx;
 	writel_relaxed(val, d->base + INT_TC1_RAW);
+	writel_relaxed(val, d->base + INT_TC2_RAW);
 	writel_relaxed(val, d->base + INT_ERR1_RAW);
 	writel_relaxed(val, d->base + INT_ERR2_RAW);
 }
@@ -141,11 +148,14 @@ static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
 static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
 {
 	writel_relaxed(hw->lli, phy->base + CX_LLI);
-	writel_relaxed(hw->count, phy->base + CX_CNT);
+	writel_relaxed(hw->count, phy->base + CX_CNT0);
 	writel_relaxed(hw->saddr, phy->base + CX_SRC);
 	writel_relaxed(hw->daddr, phy->base + CX_DST);
 	writel_relaxed(AXI_CFG_DEFAULT, phy->base + AXI_CFG);
 	writel_relaxed(hw->config, phy->base + CX_CFG);
+	wmb();
+	pr_debug("%s: desc %p: ch idx = %d, lli: 0x%x, count: 0x%x, saddr: 0x%x, daddr 0x%x, cfg: 0x%x\n", __func__, (void *)hw,
+		phy->idx, hw->lli, hw->count, hw->saddr, hw->daddr, hw->config);
 }
 
 static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy)
@@ -175,11 +185,13 @@ static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on)
 
 		/* unmask irq */
 		writel_relaxed(0xffff, d->base + INT_TC1_MASK);
+		writel_relaxed(0xffff, d->base + INT_TC2_MASK);
 		writel_relaxed(0xffff, d->base + INT_ERR1_MASK);
 		writel_relaxed(0xffff, d->base + INT_ERR2_MASK);
 	} else {
 		/* mask irq */
 		writel_relaxed(0x0, d->base + INT_TC1_MASK);
+		writel_relaxed(0x0, d->base + INT_TC2_MASK);
 		writel_relaxed(0x0, d->base + INT_ERR1_MASK);
 		writel_relaxed(0x0, d->base + INT_ERR2_MASK);
 	}
@@ -192,24 +204,31 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
 	struct k3_dma_chan *c;
 	u32 stat = readl_relaxed(d->base + INT_STAT);
 	u32 tc1  = readl_relaxed(d->base + INT_TC1);
+	u32 tc2  = readl_relaxed(d->base + INT_TC2);
 	u32 err1 = readl_relaxed(d->base + INT_ERR1);
 	u32 err2 = readl_relaxed(d->base + INT_ERR2);
 	u32 i, irq_chan = 0;
 
 	while (stat) {
 		i = __ffs(stat);
-		stat &= (stat - 1);
-		if (likely(tc1 & BIT(i))) {
+		stat &= ~BIT(i);
+		if (likely(tc1 & BIT(i)) || (tc2 & BIT(i))) {
+			unsigned long flags;
+
 			p = &d->phy[i];
 			c = p->vchan;
-			if (c) {
-				unsigned long flags;
-
+			if (c && (tc1 & BIT(i))) {
 				spin_lock_irqsave(&c->vc.lock, flags);
 				vchan_cookie_complete(&p->ds_run->vd);
 				p->ds_done = p->ds_run;
 				spin_unlock_irqrestore(&c->vc.lock, flags);
 			}
+			if (c && (tc2 & BIT(i))) {
+				spin_lock_irqsave(&c->vc.lock, flags);
+				if (p->ds_run != NULL)
+					vchan_cyclic_callback(&p->ds_run->vd);
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
 			irq_chan |= BIT(i);
 		}
 		if (unlikely((err1 & BIT(i)) || (err2 & BIT(i))))
@@ -217,14 +236,17 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
 	}
 
 	writel_relaxed(irq_chan, d->base + INT_TC1_RAW);
+	writel_relaxed(irq_chan, d->base + INT_TC2_RAW);
 	writel_relaxed(err1, d->base + INT_ERR1_RAW);
 	writel_relaxed(err2, d->base + INT_ERR2_RAW);
 
-	if (irq_chan) {
+	if (irq_chan)
 		tasklet_schedule(&d->task);
+
+	if (irq_chan || err1 || err2)
 		return IRQ_HANDLED;
-	} else
-		return IRQ_NONE;
+
+	return IRQ_NONE;
 }
 
 static int k3_dma_start_txd(struct k3_dma_chan *c)
@@ -257,6 +279,21 @@ static int k3_dma_start_txd(struct k3_dma_chan *c)
 	return -EAGAIN;
 }
 
+
+static void
+k3_dma_set_cyclic(struct k3_dma_chan *c, struct k3_dma_dev *d, int cyclic)
+{
+	int mask = 1 << c->phy->idx;
+
+	writel_relaxed(1 >> c->phy->idx, d->base + INT_TC2_RAW);
+	if (cyclic)
+		writel_relaxed(readl(d->base + INT_TC2_MASK) | mask,
+			       d->base + INT_TC2_MASK);
+	else
+		writel_relaxed(readl(d->base + INT_TC2_MASK) & ~mask,
+			       d->base + INT_TC2_MASK);
+}
+
 static void k3_dma_tasklet(unsigned long arg)
 {
 	struct k3_dma_dev *d = (struct k3_dma_dev *)arg;
@@ -272,6 +309,7 @@ static void k3_dma_tasklet(unsigned long arg)
 			if (k3_dma_start_txd(c)) {
 				/* No current txd associated with this channel */
 				dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+				k3_dma_set_cyclic(c, d, 0);
 				/* Mark this channel free */
 				c->phy = NULL;
 				p->vchan = NULL;
@@ -294,6 +332,7 @@ static void k3_dma_tasklet(unsigned long arg)
 			/* Mark this channel allocated */
 			p->vchan = c;
 			c->phy = p;
+			k3_dma_set_cyclic(c, d, c->cyclic);
 			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
 		}
 	}
@@ -350,7 +389,7 @@ static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
 	 * its total size.
 	 */
 	vd = vchan_find_desc(&c->vc, cookie);
-	if (vd) {
+	if (vd && !c->cyclic) {
 		bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size;
 	} else if ((!p) || (!p->ds_run)) {
 		bytes = 0;
@@ -360,7 +399,7 @@ static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
 
 		bytes = k3_dma_get_curr_cnt(d, p);
 		clli = k3_dma_get_curr_lli(p);
-		index = (clli - ds->desc_hw_lli) / sizeof(struct k3_desc_hw);
+		index = ((clli - ds->desc_hw_lli) / sizeof(struct k3_desc_hw)) + 1;
 		for (; index < ds->desc_num; index++) {
 			bytes += ds->desc_hw[index].count;
 			/* end of lli */
@@ -401,14 +440,20 @@ static void k3_dma_issue_pending(struct dma_chan *chan)
 static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst,
 			dma_addr_t src, size_t len, u32 num, u32 ccfg)
 {
-	if ((num + 1) < ds->desc_num)
+	if (num != ds->desc_num - 1)
 		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
 			sizeof(struct k3_desc_hw);
+
 	ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN;
 	ds->desc_hw[num].count = len;
 	ds->desc_hw[num].saddr = src;
 	ds->desc_hw[num].daddr = dst;
 	ds->desc_hw[num].config = ccfg;
+
+	pr_debug("%s: k3_dma_desc_sw = %p, desc_hw = %p (num = %d) lli: 0x%x, count: 0x%x, saddr: 0x%x, daddr 0x%x, cfg: 0x%x\n", __func__,
+	       (void *)ds, &ds->desc_hw[num], num,
+	       ds->desc_hw[num].lli, ds->desc_hw[num].count, ds->desc_hw[num].saddr, ds->desc_hw[num].daddr, ds->desc_hw[num].config);
+
 }
 
 static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
@@ -429,6 +474,7 @@ static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
 		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
 		return NULL;
 	}
+	c->cyclic = 0;
 	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
 	ds->size = len;
 	ds->desc_num = num;
@@ -474,15 +520,18 @@ static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
 	if (sgl == NULL)
 		return NULL;
 
+	c->cyclic = 0;
+
 	for_each_sg(sgl, sg, sglen, i) {
 		avail = sg_dma_len(sg);
+		pr_err("   avail=0x%x\n", (int)avail);
 		if (avail > DMA_MAX_SIZE)
 			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
 	}
 
 	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
 	if (!ds) {
-		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		dev_err(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
 		return NULL;
 	}
 	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
@@ -517,6 +566,81 @@ static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
 	return vchan_tx_prep(&c->vc, &ds->vd, flags);
 }
 
+
+
+static struct dma_async_tx_descriptor *
+k3_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+		       size_t buf_len, size_t period_len,
+		       enum dma_transfer_direction dir,
+		       unsigned long flags)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = 1, since = 0;
+	struct k3_dma_dev *dev = to_k3_dma(c->vc.chan.device);
+	size_t modulo = 0x1000; // DMA_MAX_SIZE;
+	u32 en_tc2 = 0;
+
+	pr_debug("%s: buf %p, dst %p, buf len %d, period_len = %d, dir %d\n",
+	       __func__, (void *)buf_addr, (void *)to_k3_chan(chan)->dev_addr,
+	       (int)buf_len, (int)period_len, (int)dir);
+
+	avail = buf_len;
+	if (avail > modulo)
+		num += DIV_ROUND_UP(avail, modulo) - 1;
+
+	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	if (!ds) {
+		dev_err(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		return NULL;
+	}
+	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
+	ds->desc_num = num;
+
+	c->cyclic = 1;
+	addr = buf_addr;
+	avail = buf_len;
+	total = avail;
+	num = 0;
+
+	if (period_len < modulo)
+		modulo = period_len;
+
+	do {
+		len = min_t(size_t, avail, modulo);
+
+		if (dir == DMA_MEM_TO_DEV) {
+			src = addr;
+			dst = c->dev_addr;
+		} else if (dir == DMA_DEV_TO_MEM) {
+			src = c->dev_addr;
+			dst = addr;
+		}
+		since += len;
+		if (since >= period_len) {
+			/* descriptor asks for TC2 interrupt on completion */
+			en_tc2 = CX_CFG_NODEIRQ;
+			since -= period_len;
+		} else
+			en_tc2 = 0;
+
+		k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg | en_tc2);
+
+		addr += len;
+		avail -= len;
+	} while (avail);
+
+	/* "Cyclic" == end of link points back to start of link */
+	ds->desc_hw[num - 1].lli |= ds->desc_hw_lli;
+
+	ds->size = total;
+
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+
 static int k3_dma_config(struct dma_chan *chan,
 			 struct dma_slave_config *cfg)
 {
@@ -552,7 +676,7 @@ static int k3_dma_config(struct dma_chan *chan,
 	c->ccfg |= (val << 12) | (val << 16);
 
 	if ((maxburst == 0) || (maxburst > 16))
-		val = 16;
+		val = 15;
 	else
 		val = maxburst - 1;
 	c->ccfg |= (val << 20) | (val << 24);
@@ -721,11 +845,13 @@ static int k3_dma_probe(struct platform_device *op)
 	INIT_LIST_HEAD(&d->slave.channels);
 	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
 	d->slave.dev = &op->dev;
 	d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
 	d->slave.device_tx_status = k3_dma_tx_status;
 	d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
 	d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg;
+	d->slave.device_prep_dma_cyclic = k3_dma_prep_dma_cyclic;
 	d->slave.device_issue_pending = k3_dma_issue_pending;
 	d->slave.device_config = k3_dma_config;
 	d->slave.device_pause = k3_dma_transfer_pause;
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index cf478fe6b335..1368e97e0cd7 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -173,7 +173,20 @@ config QCOM_SCM_64
 	def_bool y
 	depends on QCOM_SCM && ARM64
 
+config HAVE_ARM_SMCCC
+	bool
+
 source "drivers/firmware/broadcom/Kconfig"
+
+config REBOOT_REASON_SRAM
+	bool "Pass reboot reason to bootloader via SRAM"
+	default n
+	help
+	  On many systems there is a desire to provide a reboot reason to
+	  the bootloader, so that the bootloader can boot into a desired
+	  mode on the next boot. This option enables support for communicating
+	  this reason to the bootloader via SRAM
+
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
 
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 48dd4175297e..c1ac84532c2d 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_QCOM_SCM)		+= qcom_scm.o
 obj-$(CONFIG_QCOM_SCM_64)	+= qcom_scm-64.o
 obj-$(CONFIG_QCOM_SCM_32)	+= qcom_scm-32.o
 CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQUIRES_SEC=1) -march=armv7-a
+obj-$(CONFIG_REBOOT_REASON_SRAM)+= reboot_reason_sram.o
 
 obj-y				+= broadcom/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index d24f35d74b27..f25cd79c8a79 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include <linux/arm-smccc.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/of.h>
@@ -58,8 +59,6 @@ struct psci_operations psci_ops;
 
 typedef unsigned long (psci_fn)(unsigned long, unsigned long,
 				unsigned long, unsigned long);
-asmlinkage psci_fn __invoke_psci_fn_hvc;
-asmlinkage psci_fn __invoke_psci_fn_smc;
 static psci_fn *invoke_psci_fn;
 
 enum psci_function {
@@ -107,6 +106,26 @@ bool psci_power_state_is_valid(u32 state)
 	return !(state & ~valid_mask);
 }
 
+static unsigned long __invoke_psci_fn_hvc(unsigned long function_id,
+			unsigned long arg0, unsigned long arg1,
+			unsigned long arg2)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_hvc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static unsigned long __invoke_psci_fn_smc(unsigned long function_id,
+			unsigned long arg0, unsigned long arg1,
+			unsigned long arg2)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
 static int psci_to_linux_errno(int errno)
 {
 	switch (errno) {
diff --git a/drivers/firmware/reboot_reason_sram.c b/drivers/firmware/reboot_reason_sram.c
new file mode 100644
index 000000000000..af87b6d34f77
--- /dev/null
+++ b/drivers/firmware/reboot_reason_sram.c
@@ -0,0 +1,107 @@
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/reboot.h>
+
+/* Types of reasons */
+enum {
+	NONE,
+	BOOTLOADER,
+	RECOVERY,
+	OEM,
+	MAX_REASONS
+};
+
+static u32			reasons[MAX_REASONS];
+static void __iomem		*reboot_reason_val_addr;
+static struct notifier_block	reboot_nb;
+
+static int reboot_reason(struct notifier_block *nb, unsigned long action,
+								void *data)
+{
+	char *cmd = (char *)data;
+	u32 reason = reasons[NONE];
+
+	if (!reboot_reason_val_addr)
+		return NOTIFY_DONE;
+
+	if (cmd != NULL) {
+		if (!strncmp(cmd, "bootloader", 10))
+			reason = reasons[BOOTLOADER];
+		else if (!strncmp(cmd, "recovery", 8))
+			reason = reasons[RECOVERY];
+		else if (!strncmp(cmd, "oem-", 4)) {
+			unsigned long code;
+
+			if (!kstrtoul(cmd+4, 0, &code))
+				reason = reasons[OEM] | (code & 0xff);
+		}
+	}
+
+	if (reason != -1)
+		writel(reason, reboot_reason_val_addr);
+	return NOTIFY_DONE;
+}
+
+static int reboot_reason_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	u32 val;
+	int i;
+
+	/* initialize the reasons */
+	for (i = 0; i < MAX_REASONS; i++)
+		reasons[i] = -1;
+
+	/* Try to grab the reason io address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reboot_reason_val_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(reboot_reason_val_addr))
+		return PTR_ERR(reboot_reason_val_addr);
+
+	/* initialize specified reasons from DT */
+	if (!of_property_read_u32(pdev->dev.of_node, "reason,none", &val))
+		reasons[NONE] = val;
+	if (!of_property_read_u32(pdev->dev.of_node, "reason,bootloader", &val))
+		reasons[BOOTLOADER] = val;
+	if (!of_property_read_u32(pdev->dev.of_node, "reason,recovery", &val))
+		reasons[RECOVERY] = val;
+	if (!of_property_read_u32(pdev->dev.of_node, "reason,oem", &val))
+		reasons[OEM] = val;
+
+	/* Install the notifier */
+	reboot_nb.notifier_call = reboot_reason;
+	reboot_nb.priority = 256;
+	if (register_reboot_notifier(&reboot_nb)) {
+		dev_err(&pdev->dev,
+			"failed to setup restart handler.\n");
+	}
+	return 0;
+}
+
+int reboot_reason_remove(struct platform_device *pdev)
+{
+	unregister_reboot_notifier(&reboot_nb);
+	return 0;
+}
+
+static const struct of_device_id reboot_reason_of_match[] = {
+	{ .compatible = "linux,reboot-reason-sram", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, reboot_reason_of_match);
+
+static struct platform_driver reboot_reason_driver = {
+	.driver = {
+		.name = "reboot-reason-sram",
+		.of_match_table = reboot_reason_of_match,
+	},
+	.probe = reboot_reason_probe,
+	.remove = reboot_reason_remove,
+};
+module_platform_driver(reboot_reason_driver);
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index e9ed439a5b65..66386b42a1f4 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -2,5 +2,5 @@
 # taken to initialize them in the correct order. Link order is the only way
 # to ensure this currently.
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
-obj-y			+= drm/ vga/
+obj-y			+= drm/ vga/ arm/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
new file mode 100644
index 000000000000..255cc81c7d23
--- /dev/null
+++ b/drivers/gpu/arm/Kconfig
@@ -0,0 +1 @@
+source "drivers/gpu/arm/utgard/Kconfig"
diff --git a/drivers/gpu/arm/Makefile b/drivers/gpu/arm/Makefile
new file mode 100644
index 000000000000..e4dcf28c56f4
--- /dev/null
+++ b/drivers/gpu/arm/Makefile
@@ -0,0 +1 @@
+obj-y += utgard/
diff --git a/drivers/gpu/arm/utgard/Kbuild b/drivers/gpu/arm/utgard/Kbuild
new file mode 100644
index 000000000000..bf247bcd3e79
--- /dev/null
+++ b/drivers/gpu/arm/utgard/Kbuild
@@ -0,0 +1,207 @@
+#
+# Copyright (C) 2010-2011 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+# This file is called by the Linux build system.
+
+# set up defaults if not defined by the user
+TIMESTAMP ?= default
+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16
+USING_GPU_UTILIZATION ?= 0
+PROFILING_SKIP_PP_JOBS ?= 0
+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0
+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0
+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0
+MALI_UPPER_HALF_SCHEDULING ?= 1
+MALI_ENABLE_CPU_CYCLES ?= 0
+
+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases:
+# The in-tree driver will only use the GPL releases.
+ccflags-y += -I$(src)/linux/license/gpl
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+mali-y += \
+	linux/mali_osk_atomics.o \
+	linux/mali_osk_irq.o \
+	linux/mali_osk_wq.o \
+	linux/mali_osk_locks.o \
+	linux/mali_osk_wait_queue.o \
+	linux/mali_osk_low_level_mem.o \
+	linux/mali_osk_math.o \
+	linux/mali_osk_memory.o \
+	linux/mali_osk_misc.o \
+	linux/mali_osk_mali.o \
+	linux/mali_osk_notification.o \
+	linux/mali_osk_time.o \
+	linux/mali_osk_timers.o \
+	linux/mali_osk_bitmap.o
+
+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o
+mali-y += linux/mali_memory_external.o
+mali-y += linux/mali_memory_block_alloc.o
+mali-y += linux/mali_memory_swap_alloc.o
+
+mali-y += \
+	linux/mali_memory_manager.o \
+	linux/mali_memory_virtual.o \
+	linux/mali_memory_util.o \
+	linux/mali_memory_cow.o \
+	linux/mali_memory_defer_bind.o
+
+mali-y += \
+	linux/mali_ukk_mem.o \
+	linux/mali_ukk_gp.o \
+	linux/mali_ukk_pp.o \
+	linux/mali_ukk_core.o \
+	linux/mali_ukk_soft_job.o \
+	linux/mali_ukk_timeline.o
+
+# Source files which always are included in a build
+mali-y += \
+	common/mali_kernel_core.o \
+	linux/mali_kernel_linux.o \
+	common/mali_session.o \
+	linux/mali_device_pause_resume.o \
+	common/mali_kernel_vsync.o \
+	linux/mali_ukk_vsync.o \
+	linux/mali_kernel_sysfs.o \
+	common/mali_mmu.o \
+	common/mali_mmu_page_directory.o \
+	common/mali_mem_validation.o \
+	common/mali_hw_core.o \
+	common/mali_gp.o \
+	common/mali_pp.o \
+	common/mali_pp_job.o \
+	common/mali_gp_job.o \
+	common/mali_soft_job.o \
+	common/mali_scheduler.o \
+	common/mali_executor.o \
+	common/mali_group.o \
+	common/mali_dlbu.o \
+	common/mali_broadcast.o \
+	common/mali_pm.o \
+	common/mali_pmu.o \
+	common/mali_user_settings_db.o \
+	common/mali_kernel_utilization.o \
+	common/mali_control_timer.o \
+	common/mali_l2_cache.o \
+	common/mali_timeline.o \
+	common/mali_timeline_fence_wait.o \
+	common/mali_timeline_sync_fence.o \
+	common/mali_spinlock_reentrant.o \
+	common/mali_pm_domain.o \
+	linux/mali_osk_pm.o \
+	linux/mali_pmu_power_up_down.o \
+	__malidrv_build_info.o
+
+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+mali-y += platform/hikey/mali_hikey.o
+
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o
+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o
+
+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o
+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP)
+
+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o
+mali-$(CONFIG_SYNC) += linux/mali_sync.o
+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android
+
+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o
+
+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI400) := mali.o
+
+ccflags-y += $(EXTRA_DEFINES)
+
+# Set up our defines, which will be passed to gcc
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP)
+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED)
+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS)
+ccflags-y += -DMALI_STATE_TRACKING=1
+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES)
+
+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1)
+	ccflags-y += -DMALI_UPPER_HALF_SCHEDULING
+endif
+
+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump
+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG
+
+# Use our defines when compiling
+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform -Wno-date-time
+
+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available
+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null)
+
+SVN_INFO = (cd $(src); svn info 2>/dev/null)
+
+ifneq ($(shell $(SVN_INFO) 2>/dev/null),)
+# SVN detected
+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null)
+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV)
+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-)
+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-)
+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-)
+
+else # SVN
+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null)
+ifneq ($(GIT_REV),)
+# Git detected
+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV)
+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci")
+CHANGED_REVISION := $(GIT_REV)
+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null)
+
+else # Git
+# No Git or SVN detected
+DRIVER_REV := $(MALI_RELEASE_NAME)
+CHANGE_DATE := $(MALI_RELEASE_NAME)
+CHANGED_REVISION := $(MALI_RELEASE_NAME)
+endif
+endif
+
+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\"
+
+VERSION_STRINGS :=
+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 )
+VERSION_STRINGS += REPO_URL=$(REPO_URL)
+VERSION_STRINGS += REVISION=$(DRIVER_REV)
+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION)
+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE)
+VERSION_STRINGS += BUILD_DATE=$(shell date)
+ifdef CONFIG_MALI400_DEBUG
+VERSION_STRINGS += BUILD=debug
+else
+VERSION_STRINGS += BUILD=release
+endif
+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM)
+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM)
+VERSION_STRINGS += KDIR=$(KDIR)
+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB)
+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP)
+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING)
+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING)
+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION)
+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS)
+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING)
+
+# Create file with Mali driver configuration
+$(src)/__malidrv_build_info.c:
+	@echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c
diff --git a/drivers/gpu/arm/utgard/Kconfig b/drivers/gpu/arm/utgard/Kconfig
new file mode 100644
index 000000000000..e174e572d182
--- /dev/null
+++ b/drivers/gpu/arm/utgard/Kconfig
@@ -0,0 +1,118 @@
+config MALI400
+	tristate "Mali-300/400/450 support"
+	depends on ARM || ARM64
+	select DMA_SHARED_BUFFER
+	---help---
+	  This enables support for the ARM Mali-300, Mali-400, and Mali-450
+	  GPUs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mali.
+
+config MALI450
+	bool "Enable Mali-450 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-450 specific features.
+
+config MALI470
+	bool "Enable Mali-470 support"
+	depends on MALI400
+	---help---
+	  This enables support for Mali-470 specific features.
+
+config MALI400_DEBUG
+	bool "Enable debug in Mali driver"
+	depends on MALI400
+	---help---
+	  This enabled extra debug checks and messages in the Mali driver.
+
+config MALI400_PROFILING
+	bool "Enable Mali profiling"
+	depends on MALI400
+	select TRACEPOINTS
+	default y
+	---help---
+	  This enables gator profiling of Mali GPU events.
+
+config MALI400_INTERNAL_PROFILING
+	bool "Enable internal Mali profiling API"
+	depends on MALI400_PROFILING
+	default n
+	---help---
+	  This enables the internal legacy Mali profiling API.
+
+config MALI400_UMP
+	bool "Enable UMP support"
+	depends on MALI400
+	---help---
+	  This enables support for the UMP memory sharing API in the Mali driver.
+
+config MALI_DVFS
+	bool "Enable Mali dynamically frequency change"
+	depends on MALI400
+	default y
+	---help---
+	  This enables support for dynamic change frequency of Mali with the goal of lowering power consumption.
+
+config MALI_DMA_BUF_MAP_ON_ATTACH
+	bool "Map dma-buf attachments on attach"
+	depends on MALI400 && DMA_SHARED_BUFFER
+	default y
+	---help---
+	  This makes the Mali driver map dma-buf attachments after doing
+	  attach. If this is not set the dma-buf attachments will be mapped for
+	  every time the GPU need to access the buffer.
+
+	  Mapping for each access can cause lower performance.
+
+config MALI_SHARED_INTERRUPTS
+	bool "Support for shared interrupts"
+	depends on MALI400
+	default n
+	---help---
+	  Adds functionality required to properly support shared interrupts.  Without this support,
+	  the device driver will fail during insmod if it detects shared interrupts.  This also
+	  works when the GPU is not using shared interrupts, but might have a slight performance
+	  impact.
+
+config MALI_PMU_PARALLEL_POWER_UP
+	bool "Power up Mali PMU domains in parallel"
+	depends on MALI400
+	default n
+	---help---
+	  This makes the Mali driver power up all PMU power domains in parallel, instead of
+	  powering up domains one by one, with a slight delay in between. Powering on all power
+	  domains at the same time may cause peak currents higher than what some systems can handle.
+	  These systems must not enable this option.
+
+config MALI_DT
+	bool "Using device tree to initialize module"
+	depends on MALI400 && OF
+	default n
+	---help---
+	  This enable the Mali driver to choose the device tree path to get platform resoures
+	  and disable the old config method. Mali driver could run on the platform which the
+	  device tree is enabled in kernel and corresponding hardware description is implemented
+	  properly in device DTS file.
+
+config MALI_PLAT_SPECIFIC_DT
+	bool "Platform specific Device Tree is being used"
+	depends on MALI_DT
+	default n
+	---help---
+          This is a pragmatic approach for some platforms which make
+          use of a device tree entry that does not strictly comply to
+          what the standard Utgard driver expects to find, but have
+          their platform data implemented the old way.  Such platforms
+          should be converted to using the Device Tree so this
+          configuration option can be removed.
+
+config MALI_QUIET
+	bool "Make Mali driver very quiet"
+	depends on MALI400 && !MALI400_DEBUG
+	default n
+	---help---
+	  This forces the Mali driver to never print any messages.
+
+	  If unsure, say N.
diff --git a/drivers/gpu/arm/utgard/Makefile b/drivers/gpu/arm/utgard/Makefile
new file mode 100644
index 000000000000..44c7bb83981a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/Makefile
@@ -0,0 +1,190 @@
+#
+# Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+# 
+# This program is free software and is provided to you under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+# 
+# A copy of the licence is included with the program, and can also be obtained from Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+
+USE_UMPV2=0
+USING_PROFILING ?= 1
+USING_INTERNAL_PROFILING ?= 0
+USING_DVFS ?= 1
+MALI_HEATMAPS_ENABLED ?= 0
+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1
+MALI_PMU_PARALLEL_POWER_UP ?= 0
+USING_DT ?= 0
+MALI_MEM_SWAP_TRACKING ?= 0
+
+# The Makefile sets up "arch" based on the CONFIG, creates the version info
+# string and the __malidrv_build_info.c file, and then call the Linux build
+# system to actually build the driver. After that point the Kbuild file takes
+# over.
+
+# set up defaults if not defined by the user
+ARCH ?= arm
+
+OSKOS=linux
+FILES_PREFIX=
+
+check_cc2 = \
+	$(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \
+	then \
+		echo "$(2)"; \
+	else \
+		echo "$(3)"; \
+	fi ;)
+
+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak
+-include ../../../arm_internal.mak
+
+# Give warning of old config parameters are used
+ifneq ($(CONFIG),)
+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+ifneq ($(CPU),)
+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.")
+endif
+
+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM
+-include MALI_CONFIGURATION
+export KDIR ?= $(KDIR-$(TARGET_PLATFORM))
+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM))
+
+ifneq ($(TARGET_PLATFORM),)
+ifeq ($(MALI_PLATFORM),)
+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)")
+endif
+endif
+
+# validate lookup result
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(TARGET_PLATFORM))
+endif
+
+ifeq ($(USING_GPU_UTILIZATION), 1)
+    ifeq ($(USING_DVFS), 1)
+        $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need)
+    endif
+endif
+
+ifeq ($(USING_UMP),1)
+export CONFIG_MALI400_UMP=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1
+ifeq ($(USE_UMPV2),1)
+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers
+else
+UMP_SYMVERS_FILE ?= ../ump/Module.symvers
+endif
+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE))
+$(warning $(KBUILD_EXTRA_SYMBOLS))
+endif
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+include $(KDIR)/.config
+
+ifeq ($(ARCH), arm)
+# when compiling for ARM we're cross compiling
+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-)
+endif
+
+# report detected/selected settings
+ifdef ARM_INTERNAL_BUILD
+$(warning TARGET_PLATFORM $(TARGET_PLATFORM))
+$(warning KDIR $(KDIR))
+$(warning MALI_PLATFORM $(MALI_PLATFORM))
+endif
+
+# Set up build config
+export CONFIG_MALI400=m
+export CONFIG_MALI450=y
+export CONFIG_MALI470=y
+
+export EXTRA_DEFINES += -DCONFIG_MALI400=1
+export EXTRA_DEFINES += -DCONFIG_MALI450=1
+export EXTRA_DEFINES += -DCONFIG_MALI470=1
+
+ifneq ($(MALI_PLATFORM),)
+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1
+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c)
+endif
+
+ifeq ($(USING_PROFILING),1)
+ifeq ($(CONFIG_TRACEPOINTS),)
+$(warning CONFIG_TRACEPOINTS required for profiling)
+else
+export CONFIG_MALI400_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1
+ifeq ($(USING_INTERNAL_PROFILING),1)
+export CONFIG_MALI400_INTERNAL_PROFILING=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1
+endif
+ifeq ($(MALI_HEATMAPS_ENABLED),1)
+export MALI_HEATMAPS_ENABLED=y
+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED
+endif
+endif
+endif
+
+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1)
+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH
+endif
+
+ifeq ($(MALI_SHARED_INTERRUPTS),1)
+export CONFIG_MALI_SHARED_INTERRUPTS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS
+endif
+
+ifeq ($(USING_DVFS),1)
+export CONFIG_MALI_DVFS=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS
+endif
+
+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1)
+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y
+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP
+endif
+
+ifdef CONFIG_OF
+ifeq ($(USING_DT),1)
+export CONFIG_MALI_DT=y
+export EXTRA_DEFINES += -DCONFIG_MALI_DT
+endif
+endif
+
+ifneq ($(BUILD),release)
+# Debug
+export CONFIG_MALI400_DEBUG=y
+else
+# Release
+ifeq ($(MALI_QUIET),1)
+export CONFIG_MALI_QUIET=y
+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET
+endif
+endif
+
+ifeq ($(MALI_SKIP_JOBS),1)
+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1
+endif
+
+ifeq ($(MALI_MEM_SWAP_TRACKING),1)
+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1
+endif
+
+all: $(UMP_SYMVERS_FILE)
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules
+	@rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+kernelrelease:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease
+
+export CONFIG KBUILD_EXTRA_SYMBOLS
diff --git a/drivers/gpu/arm/utgard/common/mali_broadcast.c b/drivers/gpu/arm/utgard/common/mali_broadcast.c
new file mode 100644
index 000000000000..136db61ace4a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_broadcast.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_broadcast.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#define MALI_BROADCAST_REGISTER_SIZE      0x1000
+#define MALI_BROADCAST_REG_BROADCAST_MASK    0x0
+#define MALI_BROADCAST_REG_INTERRUPT_MASK    0x4
+
+struct mali_bcast_unit {
+	struct mali_hw_core hw_core;
+	u32 current_mask;
+};
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_bcast_unit *bcast_unit = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+	MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n",
+			     resource->description));
+
+	bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit));
+	if (NULL == bcast_unit) {
+		MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n"));
+		return NULL;
+	}
+
+	if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core,
+			resource, MALI_BROADCAST_REGISTER_SIZE)) {
+		bcast_unit->current_mask = 0;
+		mali_bcast_reset(bcast_unit);
+
+		return bcast_unit;
+	} else {
+		MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n"));
+	}
+
+	_mali_osk_free(bcast_unit);
+
+	return NULL;
+}
+
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	mali_hw_core_delete(&bcast_unit->hw_core);
+	_mali_osk_free(bcast_unit);
+}
+
+/* Call this function to add the @group's id into bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit,
+			  struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask |= (bcast_id); /* add PP core to broadcast */
+	broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+/* Call this function to remove @group's id from bcast mask
+ * Note: redundant calling this function with same @group
+ * doesn't make any difference as calling it once
+ */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit,
+			     struct mali_group *group)
+{
+	u32 bcast_id;
+	u32 broadcast_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group));
+
+	broadcast_mask = bcast_unit->current_mask;
+
+	broadcast_mask &= ~((bcast_id << 16) | bcast_id);
+
+	/* store mask so we can restore on reset */
+	bcast_unit->current_mask = broadcast_mask;
+}
+
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4,
+			 ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n",
+			  bcast_unit->current_mask,
+			  bcast_unit->current_mask & 0xFF));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    bcast_unit->current_mask);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    bcast_unit->current_mask & 0xFF);
+}
+
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit)
+{
+	MALI_DEBUG_ASSERT_POINTER(bcast_unit);
+
+	MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n"));
+
+	/* set broadcast mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_BROADCAST_MASK,
+				    0x0);
+
+	/* set IRQ override mask */
+	mali_hw_core_register_write(&bcast_unit->hw_core,
+				    MALI_BROADCAST_REG_INTERRUPT_MASK,
+				    0x0);
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_broadcast.h b/drivers/gpu/arm/utgard/common/mali_broadcast.h
new file mode 100644
index 000000000000..efce44142ee9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_broadcast.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BROADCAST_H__
+#define __MALI_BROADCAST_H__
+
+/*
+ *  Interface for the broadcast unit on Mali-450.
+ *
+ * - Represents up to 8 × (MMU + PP) pairs.
+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by
+ *   setting a mask.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_group.h"
+
+struct mali_bcast_unit;
+
+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource);
+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit);
+
+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */
+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group);
+
+/* Re-set cached mask. This needs to be called after having been suspended. */
+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Disable broadcast unit
+ *
+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be
+ * added or removed when the unit is disabled.
+ */
+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit);
+
+/**
+ * Re-enable broadcast unit
+ *
+ * This resets the masks to include the cores present when mali_bcast_disable was called.
+ */
+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit)
+{
+	mali_bcast_reset(bcast_unit);
+}
+
+#endif /* __MALI_BROADCAST_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_control_timer.c b/drivers/gpu/arm/utgard/common/mali_control_timer.c
new file mode 100644
index 000000000000..d0dd95ac1b39
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_control_timer.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+static u64 period_start_time = 0;
+
+static _mali_osk_timer_t *mali_control_timer = NULL;
+static mali_bool timer_running = MALI_FALSE;
+
+static u32 mali_control_timeout = 1000;
+
+void mali_control_timer_add(u32 timeout)
+{
+	_mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout));
+}
+
+static void mali_control_timer_callback(void *arg)
+{
+	if (mali_utilization_enabled()) {
+		struct mali_gpu_utilization_data *util_data = NULL;
+		u64 time_period = 0;
+		mali_bool need_add_timer = MALI_TRUE;
+
+		/* Calculate gpu utilization */
+		util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer);
+
+		if (util_data) {
+#if defined(CONFIG_MALI_DVFS)
+			mali_dvfs_policy_realize(util_data, time_period);
+#else
+			mali_utilization_platform_realize(util_data);
+#endif
+
+			if (MALI_TRUE == need_add_timer) {
+				mali_control_timer_add(mali_control_timeout);
+			}
+		}
+	}
+}
+
+/* Init a timer (for now it is used for GPU utilization and dvfs) */
+_mali_osk_errcode_t mali_control_timer_init(void)
+{
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Use device specific settings (if defined) */
+		if (0 != data.control_interval) {
+			mali_control_timeout = data.control_interval;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout));
+		}
+	}
+
+	mali_control_timer = _mali_osk_timer_init();
+	if (NULL == mali_control_timer) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	_mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_control_timer_term(void)
+{
+	if (NULL != mali_control_timer) {
+		_mali_osk_timer_del(mali_control_timer);
+		timer_running = MALI_FALSE;
+		_mali_osk_timer_term(mali_control_timer);
+		mali_control_timer = NULL;
+	}
+}
+
+mali_bool mali_control_timer_resume(u64 time_now)
+{
+	mali_utilization_data_assert_locked();
+
+	if (timer_running != MALI_TRUE) {
+		timer_running = MALI_TRUE;
+
+		period_start_time = time_now;
+
+		mali_utilization_reset();
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void mali_control_timer_pause(void)
+{
+	mali_utilization_data_assert_locked();
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+	}
+}
+
+void mali_control_timer_suspend(mali_bool suspend)
+{
+	mali_utilization_data_lock();
+
+	if (timer_running == MALI_TRUE) {
+		timer_running = MALI_FALSE;
+
+		mali_utilization_data_unlock();
+
+		if (suspend == MALI_TRUE) {
+			_mali_osk_timer_del(mali_control_timer);
+			mali_utilization_reset();
+		}
+	} else {
+		mali_utilization_data_unlock();
+	}
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_control_timer.h b/drivers/gpu/arm/utgard/common/mali_control_timer.h
new file mode 100644
index 000000000000..4f919ecfd70a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_control_timer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_CONTROL_TIMER_H__
+#define __MALI_CONTROL_TIMER_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_control_timer_init(void);
+
+void mali_control_timer_term(void);
+
+mali_bool mali_control_timer_resume(u64 time_now);
+
+void mali_control_timer_suspend(mali_bool suspend);
+void mali_control_timer_pause(void);
+
+void mali_control_timer_add(u32 timeout);
+
+#endif /* __MALI_CONTROL_TIMER_H__ */
+
diff --git a/drivers/gpu/arm/utgard/common/mali_dlbu.c b/drivers/gpu/arm/utgard/common/mali_dlbu.c
new file mode 100644
index 000000000000..efe1ab3ab5cc
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_dlbu.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_dlbu.h"
+#include "mali_memory.h"
+#include "mali_pp.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+/**
+ * Size of DLBU registers in bytes
+ */
+#define MALI_DLBU_SIZE 0x400
+
+mali_dma_addr mali_dlbu_phys_addr = 0;
+static mali_io_address mali_dlbu_cpu_addr = NULL;
+
+/**
+ * DLBU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_dlbu_register {
+	MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address;
+                                                             31:12 Physical address to the page used for the DLBU
+                                                             0 DLBU enable - set this bit to 1 enables the AXI bus
+                                                             between PPs and L2s, setting to 0 disables the router and
+                                                             no further transactions are sent to DLBU */
+	MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR     = 0x0004, /**< Master tile list virtual base address;
+                                                             31:12 Virtual address to the page used for the DLBU */
+	MALI_DLBU_REGISTER_TLLIST_VBASEADDR     = 0x0008, /**< Tile list virtual base address;
+                                                             31:12 Virtual address to the tile list. This address is used when
+                                                             calculating the call address sent to PP.*/
+	MALI_DLBU_REGISTER_FB_DIM                 = 0x000C, /**< Framebuffer dimension;
+                                                             23:16 Number of tiles in Y direction-1
+                                                             7:0 Number of tiles in X direction-1 */
+	MALI_DLBU_REGISTER_TLLIST_CONF       = 0x0010, /**< Tile list configuration;
+                                                             29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024
+                                                             21:16 2^n number of tiles to be binned to one tile list in Y direction
+                                                             5:0 2^n number of tiles to be binned to one tile list in X direction */
+	MALI_DLBU_REGISTER_START_TILE_POS         = 0x0014, /**< Start tile positions;
+                                                             31:24 start position in Y direction for group 1
+                                                             23:16 start position in X direction for group 1
+                                                             15:8 start position in Y direction for group 0
+                                                             7:0 start position in X direction for group 0 */
+	MALI_DLBU_REGISTER_PP_ENABLE_MASK         = 0x0018, /**< PP enable mask;
+                                                             7 enable PP7 for load balancing
+                                                             6 enable PP6 for load balancing
+                                                             5 enable PP5 for load balancing
+                                                             4 enable PP4 for load balancing
+                                                             3 enable PP3 for load balancing
+                                                             2 enable PP2 for load balancing
+                                                             1 enable PP1 for load balancing
+                                                             0 enable PP0 for load balancing */
+} mali_dlbu_register;
+
+typedef enum {
+	PP0ENABLE = 0,
+	PP1ENABLE,
+	PP2ENABLE,
+	PP3ENABLE,
+	PP4ENABLE,
+	PP5ENABLE,
+	PP6ENABLE,
+	PP7ENABLE
+} mali_dlbu_pp_enable;
+
+struct mali_dlbu_core {
+	struct mali_hw_core     hw_core;           /**< Common for all HW cores */
+	u32                     pp_cores_mask;     /**< This is a mask for the PP cores whose operation will be controlled by LBU
+                                                      see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */
+};
+
+_mali_osk_errcode_t mali_dlbu_initialize(void)
+{
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n"));
+
+	if (_MALI_OSK_ERR_OK ==
+	    mali_mmu_get_table_page(&mali_dlbu_phys_addr,
+				    &mali_dlbu_cpu_addr)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_dlbu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n"));
+
+	if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) {
+		mali_mmu_release_table_page(mali_dlbu_phys_addr,
+					    mali_dlbu_cpu_addr);
+		mali_dlbu_phys_addr = 0;
+		mali_dlbu_cpu_addr = 0;
+	}
+}
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource)
+{
+	struct mali_dlbu_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_dlbu_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) {
+			core->pp_cores_mask = 0;
+			if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) {
+				return core;
+			}
+			MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description));
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	mali_hw_core_delete(&dlbu->hw_core);
+	_mali_osk_free(dlbu);
+}
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu)
+{
+	u32 dlbu_registers[7];
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description));
+
+	dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */
+	dlbu_registers[1] = MALI_DLBU_VIRT_ADDR;
+	dlbu_registers[2] = 0;
+	dlbu_registers[3] = 0;
+	dlbu_registers[4] = 0;
+	dlbu_registers[5] = 0;
+	dlbu_registers[6] = dlbu->pp_cores_mask;
+
+	/* write reset values to core registers */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7);
+
+	err = _MALI_OSK_ERR_OK;
+
+	return err;
+}
+
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu)
+{
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+
+	mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask);
+}
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask |= bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask));
+}
+
+/* Remove a group from the DLBU */
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group)
+{
+	struct mali_pp_core *pp_core;
+	u32 bcast_id;
+
+	MALI_DEBUG_ASSERT_POINTER(dlbu);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	pp_core = mali_group_get_pp_core(group);
+	bcast_id = mali_pp_core_get_bcast_id(pp_core);
+
+	dlbu->pp_cores_mask &= ~bcast_id;
+	MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask));
+}
+
+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job)
+{
+	u32 *registers;
+	MALI_DEBUG_ASSERT(job);
+	registers = mali_pp_job_get_dlbu_registers(job);
+	MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n"));
+
+	/* Writing 4 registers:
+	 * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */
+	mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4);
+
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_dlbu.h b/drivers/gpu/arm/utgard/common/mali_dlbu.h
new file mode 100644
index 000000000000..6b068884bd49
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_dlbu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DLBU_H__
+#define __MALI_DLBU_H__
+
+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */
+
+#include "mali_osk.h"
+
+struct mali_pp_job;
+struct mali_group;
+struct mali_dlbu_core;
+
+extern mali_dma_addr mali_dlbu_phys_addr;
+
+_mali_osk_errcode_t mali_dlbu_initialize(void);
+void mali_dlbu_terminate(void);
+
+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource);
+void mali_dlbu_delete(struct mali_dlbu_core *dlbu);
+
+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group);
+
+/** @brief Called to update HW after DLBU state changed
+ *
+ * This function must be called after \a mali_dlbu_add_group or \a
+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the
+ * same is accomplished by calling \a mali_dlbu_reset.
+ */
+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu);
+
+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job);
+
+#endif /* __MALI_DLBU_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_dvfs_policy.c b/drivers/gpu/arm/utgard/common/mali_dvfs_policy.c
new file mode 100644
index 000000000000..12ba069ec13b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_dvfs_policy.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_dvfs_policy.h"
+#include "mali_osk_mali.h"
+#include "mali_osk_profiling.h"
+
+#define CLOCK_TUNING_TIME_DEBUG 0
+
+#define MAX_PERFORMANCE_VALUE 256
+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5))
+
+/** The max fps the same as display vsync default 60, can set by module insert parameter */
+int mali_max_system_fps = 60;
+/** A lower limit on their desired FPS default 58, can set by module insert parameter */
+int mali_desired_fps = 58;
+
+static int mali_fps_step1 = 0;
+static int mali_fps_step2 = 0;
+
+static int clock_step = -1;
+static int cur_clk_step = -1;
+static struct mali_gpu_clock *gpu_clk = NULL;
+
+/*Function prototype */
+static int (*mali_gpu_set_freq)(int) = NULL;
+static int (*mali_gpu_get_freq)(void) = NULL;
+
+static mali_bool mali_dvfs_enabled = MALI_FALSE;
+
+#define NUMBER_OF_NANOSECONDS_PER_SECOND  1000000000ULL
+static u32 calculate_window_render_fps(u64 time_period)
+{
+	u32 max_window_number;
+	u64 tmp;
+	u64 max = time_period;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 time_period_shift;
+	u32 max_window_number_shift;
+	u32 ret_val;
+
+	max_window_number = mali_session_max_window_num();
+
+	/* To avoid float division, extend the dividend to ns unit */
+	tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
+	if (tmp > time_period) {
+		max = tmp;
+	}
+
+	/*
+	 * We may have 64-bit values, a dividend or a divisor or both
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 */
+	leading_zeroes = _mali_osk_clz((u32)(max >> 32));
+	shift_val = 32 - leading_zeroes;
+
+	time_period_shift = (u32)(time_period >> shift_val);
+	max_window_number_shift = (u32)(tmp >> shift_val);
+
+	ret_val = max_window_number_shift / time_period_shift;
+
+	return ret_val;
+}
+
+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up)
+{
+	int i = 0;
+	bool clock_changed = false;
+
+	/* Round up the closest available frequency step for target_clock_hz */
+	for (i = 0; i < gpu_clk->num_of_steps; i++) {
+		/* Find the first item > target_clock_hz */
+		if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) {
+			break;
+		}
+	}
+
+	/* If the target clock greater than the maximum clock just pick the maximum one*/
+	if (i == gpu_clk->num_of_steps) {
+		i = gpu_clk->num_of_steps - 1;
+	} else {
+		if ((!pick_clock_up) && (i > 0)) {
+			i = i - 1;
+		}
+	}
+
+	clock_step = i;
+	if (cur_clk_step != clock_step) {
+		clock_changed = true;
+	}
+
+	return clock_changed;
+}
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period)
+{
+	int under_perform_boundary_value = 0;
+	int over_perform_boundary_value = 0;
+	int current_fps = 0;
+	int current_gpu_util = 0;
+	bool clock_changed = false;
+#if CLOCK_TUNING_TIME_DEBUG
+	struct timeval start;
+	struct timeval stop;
+	unsigned int elapse_time;
+	do_gettimeofday(&start);
+#endif
+	u32 window_render_fps;
+
+	if (NULL == gpu_clk) {
+		MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n"));
+		return;
+	}
+
+	window_render_fps = calculate_window_render_fps(time_period);
+
+	current_fps = window_render_fps;
+	current_gpu_util = data->utilization_gpu;
+
+	/* Get the specific under_perform_boundary_value and over_perform_boundary_value */
+	if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+	} else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	} else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50);
+	} else {
+		under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55);
+		over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35);
+	}
+
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d,  over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value));
+	MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d,  pressure render fps = %d \n", current_fps, window_render_fps));
+
+	/* Get current clock value */
+	cur_clk_step = mali_gpu_get_freq();
+
+	/* Consider offscreen */
+	if (0 == current_fps) {
+		/* GP or PP under perform, need to give full power */
+		if (current_gpu_util > over_perform_boundary_value) {
+			if (cur_clk_step != gpu_clk->num_of_steps - 1) {
+				clock_changed = true;
+				clock_step = gpu_clk->num_of_steps - 1;
+			}
+		}
+
+		/* If GPU is idle, use lowest power */
+		if (0 == current_gpu_util) {
+			if (cur_clk_step != 0) {
+				clock_changed = true;
+				clock_step = 0;
+			}
+		}
+
+		goto real_setting;
+	}
+
+	/* 2. Calculate target clock if the GPU clock can be tuned */
+	if (-1 != cur_clk_step) {
+		int target_clk_mhz = -1;
+		mali_bool pick_clock_up = MALI_TRUE;
+
+		if (current_gpu_util > under_perform_boundary_value) {
+			/* when under perform, need to consider the fps part */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps;
+			pick_clock_up = MALI_TRUE;
+		} else if (current_gpu_util < over_perform_boundary_value) {
+			/* when over perform, did't need to consider fps, system didn't want to reach desired fps */
+			target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value;
+			pick_clock_up = MALI_FALSE;
+		}
+
+		if (-1 != target_clk_mhz) {
+			clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up);
+		}
+	}
+
+real_setting:
+	if (clock_changed) {
+		mali_gpu_set_freq(clock_step);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      gpu_clk->item[clock_step].clock,
+					      gpu_clk->item[clock_step].vol / 1000,
+					      0, 0, 0);
+	}
+
+#if CLOCK_TUNING_TIME_DEBUG
+	do_gettimeofday(&stop);
+
+	elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start);
+	MALI_DEBUG_PRINT(2, ("Using ARM power policy:  eclapse time = %d\n", elapse_time));
+#endif
+}
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void)
+{
+	_mali_osk_device_data data;
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n"));
+
+
+			mali_fps_step1 = mali_max_system_fps / 3;
+			mali_fps_step2 = mali_max_system_fps / 5;
+
+			data.get_clock_info(&gpu_clk);
+
+			if (gpu_clk != NULL) {
+#ifdef DEBUG
+				int i;
+				for (i = 0; i < gpu_clk->num_of_steps; i++) {
+					MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n",
+							     i, gpu_clk->item[i].clock, gpu_clk->item[i].vol));
+				}
+#endif
+			} else {
+				MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n"));
+			}
+
+			mali_gpu_get_freq = data.get_freq;
+			mali_gpu_set_freq = data.set_freq;
+
+			if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0)
+			    && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) {
+				mali_dvfs_enabled = MALI_TRUE;
+			}
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	} else {
+		err = _MALI_OSK_ERR_FAULT;
+		MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n"));
+	}
+
+	return err;
+}
+
+/*
+ * Always give full power when start a new period,
+ * if mali dvfs enabled, for performance consideration
+ */
+void mali_dvfs_policy_new_period(void)
+{
+	/* Always give full power when start a new period */
+	unsigned int cur_clk_step = 0;
+
+	cur_clk_step = mali_gpu_get_freq();
+
+	if (cur_clk_step != (gpu_clk->num_of_steps - 1)) {
+		mali_gpu_set_freq(gpu_clk->num_of_steps - 1);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock,
+					      gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0);
+	}
+}
+
+mali_bool mali_dvfs_policy_enabled(void)
+{
+	return mali_dvfs_enabled;
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item)
+{
+	if (mali_platform_device != NULL) {
+
+		struct mali_gpu_device_data *device_data = NULL;
+		device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+
+		if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) {
+
+			int cur_clk_step = device_data->get_freq();
+			struct mali_gpu_clock *mali_gpu_clk = NULL;
+
+			device_data->get_clock_info(&mali_gpu_clk);
+			clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock;
+			clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol;
+		} else {
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n"));
+		}
+	}
+}
+#endif
+
diff --git a/drivers/gpu/arm/utgard/common/mali_dvfs_policy.h b/drivers/gpu/arm/utgard/common/mali_dvfs_policy.h
new file mode 100644
index 000000000000..55e4b354c1a9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_dvfs_policy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_DVFS_POLICY_H__
+#define __MALI_DVFS_POLICY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period);
+
+_mali_osk_errcode_t mali_dvfs_policy_init(void);
+
+void mali_dvfs_policy_new_period(void);
+
+mali_bool mali_dvfs_policy_enabled(void);
+
+#if defined(CONFIG_MALI400_PROFILING)
+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif/* __MALI_DVFS_POLICY_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_executor.c b/drivers/gpu/arm/utgard/common/mali_executor.c
new file mode 100644
index 000000000000..3c2864386798
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_executor.c
@@ -0,0 +1,2642 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_executor.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_timeline.h"
+#include "mali_osk_profiling.h"
+#include "mali_session.h"
+
+/*
+ * If dma_buf with map on demand is used, we defer job deletion and job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1
+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */
+
+/*
+ * ---------- static type definitions (structs, enums, etc) ----------
+ */
+
+enum mali_executor_state_t {
+	EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */
+	EXEC_STATE_DISABLED,    /* Disabled by core scaling (do not use) */
+	EXEC_STATE_EMPTY,       /* No child groups for virtual group (do not use) */
+	EXEC_STATE_INACTIVE,    /* Can be used, but must be activate first */
+	EXEC_STATE_IDLE,        /* Active and ready to be used */
+	EXEC_STATE_WORKING,     /* Executing a job */
+};
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock for this module (protecting all HW access except L2 caches) */
+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL;
+
+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/*
+ * ---------- static variables ----------
+ */
+
+/* Used to defer job scheduling */
+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL;
+
+/* Store version from GP and PP (user space wants to know this) */
+static u32 pp_version = 0;
+static u32 gp_version = 0;
+
+/* List of physical PP groups which are disabled by some external source */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled);
+static u32 group_list_disabled_count = 0;
+
+/* List of groups which can be used, but activate first */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive);
+static u32 group_list_inactive_count = 0;
+
+/* List of groups which are active and ready to be used */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle);
+static u32 group_list_idle_count = 0;
+
+/* List of groups which are executing a job */
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working);
+static u32 group_list_working_count = 0;
+
+/* Virtual group (if any) */
+static struct mali_group *virtual_group = NULL;
+
+/* Virtual group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT;
+
+/* GP group */
+static struct mali_group *gp_group = NULL;
+
+/* GP group state is tracked with a state variable instead of 4 lists */
+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT;
+
+static u32 gp_returned_cookie = 0;
+
+/* Total number of physical PP cores present */
+static u32 num_physical_pp_cores_total = 0;
+
+/* Number of physical cores which are enabled */
+static u32 num_physical_pp_cores_enabled = 0;
+
+/* Enable or disable core scaling */
+static mali_bool core_scaling_enabled = MALI_TRUE;
+
+/* Variables to allow safe pausing of the scheduler */
+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL;
+static u32 pause_count = 0;
+
+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */
+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+
+/* Variables used to implement notify pp core changes to userspace when core scaling
+ * is finished in mali_executor_complete_group() function. */
+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL;
+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL;
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+static mali_bool mali_executor_is_suspended(void *data);
+static mali_bool mali_executor_is_working(void);
+static void mali_executor_disable_empty_virtual(void);
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group);
+static mali_bool mali_executor_has_virtual_group(void);
+static mali_bool mali_executor_virtual_group_is_usable(void);
+static void mali_executor_schedule(void);
+static void mali_executor_wq_schedule(void *arg);
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job, u32 added_size);
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done);
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state);
+
+static void mali_executor_group_enable_internal(struct mali_group *group);
+static void mali_executor_group_disable_internal(struct mali_group *group);
+static void mali_executor_core_scale(unsigned int target_core_nr);
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group);
+static void mali_executor_notify_core_change(u32 num_cores);
+static void mali_executor_wq_notify_core_change(void *arg);
+static void mali_executor_change_group_status_disabled(struct mali_group *group);
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group);
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count);
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_executor_initialize(void)
+{
+	mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR);
+	if (NULL == mali_executor_lock_obj) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL);
+	if (NULL == executor_wq_high_pri) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_working_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_working_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL);
+	if (NULL == executor_wq_notify_core_change) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == executor_notify_core_change_wait_queue) {
+		mali_executor_terminate();
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_terminate(void)
+{
+	if (NULL != executor_notify_core_change_wait_queue) {
+		_mali_osk_wait_queue_term(executor_notify_core_change_wait_queue);
+		executor_notify_core_change_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_notify_core_change) {
+		_mali_osk_wq_delete_work(executor_wq_notify_core_change);
+		executor_wq_notify_core_change = NULL;
+	}
+
+	if (NULL != executor_working_wait_queue) {
+		_mali_osk_wait_queue_term(executor_working_wait_queue);
+		executor_working_wait_queue = NULL;
+	}
+
+	if (NULL != executor_wq_high_pri) {
+		_mali_osk_wq_delete_work(executor_wq_high_pri);
+		executor_wq_high_pri = NULL;
+	}
+
+	if (NULL != mali_executor_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_executor_lock_obj);
+		mali_executor_lock_obj = NULL;
+	}
+}
+
+void mali_executor_populate(void)
+{
+	u32 num_groups;
+	u32 i;
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	/* Do we have a virtual group? */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (mali_group_is_virtual(group)) {
+			virtual_group = group;
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			break;
+		}
+	}
+
+	/* Find all the available physical GP and PP cores */
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+			struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+
+			if (!mali_group_is_virtual(group)) {
+				if (NULL != pp_core) {
+					if (0 == pp_version) {
+						/* Retrieve PP version from the first available PP core */
+						pp_version = mali_pp_core_get_version(pp_core);
+					}
+
+					if (NULL != virtual_group) {
+						mali_executor_lock();
+						mali_group_add_group(virtual_group, group);
+						mali_executor_unlock();
+					} else {
+						_mali_osk_list_add(&group->executor_list, &group_list_inactive);
+						group_list_inactive_count++;
+					}
+
+					num_physical_pp_cores_total++;
+				} else {
+					MALI_DEBUG_ASSERT_POINTER(gp_core);
+
+					if (0 == gp_version) {
+						/* Retrieve GP version */
+						gp_version = mali_gp_core_get_version(gp_core);
+					}
+
+					gp_group = group;
+					gp_group_state = EXEC_STATE_INACTIVE;
+				}
+
+			}
+		}
+	}
+
+	num_physical_pp_cores_enabled = num_physical_pp_cores_total;
+}
+
+void mali_executor_depopulate(void)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+
+	if (NULL != gp_group) {
+		mali_group_delete(gp_group);
+		gp_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+	if (NULL != virtual_group) {
+		mali_group_delete(virtual_group);
+		virtual_group = NULL;
+	}
+
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working));
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_delete(group);
+	}
+}
+
+void mali_executor_suspend(void)
+{
+	mali_executor_lock();
+
+	/* Increment the pause_count so that no more jobs will be scheduled */
+	pause_count++;
+
+	mali_executor_unlock();
+
+	_mali_osk_wait_queue_wait_event(executor_working_wait_queue,
+					mali_executor_is_suspended, NULL);
+
+	/*
+	 * mali_executor_complete_XX() leaves jobs in idle state.
+	 * deactivate option is used when we are going to power down
+	 * the entire GPU (OS suspend) and want a consistent SW vs HW
+	 * state.
+	 */
+	mali_executor_lock();
+
+	mali_executor_deactivate_list_idle(MALI_TRUE);
+
+	/*
+	 * The following steps are used to deactive all of activated
+	 * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP
+	 * _STAET_ACTIVATION_PENDING) groups, to make sure the variable
+	 * pd_mask_wanted is equal with 0. */
+	if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) {
+		gp_group_state = EXEC_STATE_INACTIVE;
+		mali_group_deactivate(gp_group);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (MALI_GROUP_STATE_INACTIVE
+		    != mali_group_get_state(virtual_group)) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+			mali_group_deactivate(virtual_group);
+		}
+	}
+
+	if (0 < group_list_inactive_count) {
+		struct mali_group *group;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+					    &group_list_inactive,
+					    struct mali_group, executor_list) {
+			if (MALI_GROUP_STATE_ACTIVATION_PENDING
+			    == mali_group_get_state(group)) {
+				mali_group_deactivate(group);
+			}
+
+			/*
+			 * On mali-450 platform, we may have physical group in the group inactive
+			 * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only
+			 * deactivate it is not enough, we still also need add it back to virtual group.
+			 * And now, virtual group must be in INACTIVE state, so it's safe to add
+			 * physical group to virtual group at this point.
+			 */
+			if (NULL != virtual_group) {
+				_mali_osk_list_delinit(&group->executor_list);
+				group_list_inactive_count--;
+
+				mali_group_add_group(virtual_group, group);
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_resume(void)
+{
+	mali_executor_lock();
+
+	/* Decrement pause_count to allow scheduling again (if it reaches 0) */
+	pause_count--;
+	if (0 == pause_count) {
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+u32 mali_executor_get_num_cores_total(void)
+{
+	return num_physical_pp_cores_total;
+}
+
+u32 mali_executor_get_num_cores_enabled(void)
+{
+	return num_physical_pp_cores_enabled;
+}
+
+struct mali_pp_core *mali_executor_get_virtual_pp(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(virtual_group);
+	MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core);
+	return virtual_group->pp_core;
+}
+
+struct mali_group *mali_executor_get_virtual_group(void)
+{
+	return virtual_group;
+}
+
+void mali_executor_zap_all_active(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *temp;
+	mali_bool ret;
+
+	mali_executor_lock();
+
+	/*
+	 * This function is a bit complicated because
+	 * mali_group_zap_session() can fail. This only happens because the
+	 * group is in an unhandled page fault status.
+	 * We need to make sure this page fault is handled before we return,
+	 * so that we know every single outstanding MMU transactions have
+	 * completed. This will allow caller to safely remove physical pages
+	 * when we have returned.
+	 */
+
+	MALI_DEBUG_ASSERT(NULL != gp_group);
+	ret = mali_group_zap_session(gp_group, session);
+	if (MALI_FALSE == ret) {
+		struct mali_gp_job *gp_job = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+		MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		ret = mali_group_zap_session(virtual_group, session);
+		if (MALI_FALSE == ret) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working,
+				    struct mali_group, executor_list) {
+		ret = mali_group_zap_session(group, session);
+		if (MALI_FALSE == ret) {
+			ret = mali_group_zap_session(group, session);
+			if (MALI_FALSE == ret) {
+				struct mali_pp_job *pp_job = NULL;
+
+				mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+				if (NULL != pp_job) {
+					/* PP job completed, free it */
+					mali_scheduler_complete_pp_job(pp_job,
+								       0, MALI_FALSE,
+								       MALI_TRUE);
+				}
+			}
+		}
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule)
+{
+	if (MALI_SCHEDULER_MASK_EMPTY != mask) {
+		if (MALI_TRUE == deferred_schedule) {
+			_mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri);
+		} else {
+			/* Schedule from this thread*/
+			mali_executor_lock();
+			mali_executor_schedule();
+			mali_executor_unlock();
+		}
+	}
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor GP: Job %d Timeout on %s\n",
+			    mali_gp_job_get_id(group->gp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_gp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	mali_group_mask_all_interrupts_gp(group);
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only VS completed so far, while PLBU is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) {
+		if (mali_group_gp_is_active(group)) {
+			/* Only PLBU completed so far, while VS is still active */
+
+			/* Enable all but the current interrupt */
+			mali_group_enable_interrupts_gp(group, int_result);
+
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_OK;
+		}
+	} else if (MALI_INTERRUPT_RESULT_OOM == int_result) {
+
+		mali_executor_unlock();
+
+		mali_group_schedule_oom_work_handler(group);
+
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_gp(group);
+		}
+	} else {
+		struct mali_gp_job *job;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, &job, NULL);
+
+		mali_executor_unlock();
+
+		/* GP jobs always fully complete */
+		MALI_DEBUG_ASSERT(NULL != job);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, success,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+	mali_bool time_out = MALI_FALSE;
+
+	MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (in_upper_half) {
+		if (mali_group_is_in_virtual(group)) {
+			/* Child groups should never handle PP interrupts */
+			MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group));
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+	MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group));
+
+	if (mali_group_has_timed_out(group)) {
+		int_result = MALI_INTERRUPT_RESULT_ERROR;
+		time_out = MALI_TRUE;
+		MALI_PRINT(("Executor PP: Job %d Timeout on %s\n",
+			    mali_pp_job_get_id(group->pp_running_job),
+			    mali_group_core_description(group)));
+	} else {
+		int_result = mali_group_get_interrupt_result_pp(group);
+		if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	} else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) {
+		if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) {
+			/* Some child groups are still working, so nothing to do right now */
+			mali_executor_unlock();
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n",
+			     mali_group_core_description(group),
+			     (MALI_INTERRUPT_RESULT_ERROR == int_result) ?
+			     "ERROR" : "success"));
+
+	if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) {
+		/* Don't bother to do processing of errors in upper half */
+		mali_group_mask_all_interrupts_pp(group);
+		mali_executor_unlock();
+
+		if (MALI_FALSE == time_out) {
+			mali_group_schedule_bottom_half_pp(group);
+		}
+	} else {
+		struct mali_pp_job *job = NULL;
+		mali_bool success;
+
+		if (MALI_TRUE == time_out) {
+			mali_group_dump_status(group);
+		}
+
+		success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ?
+			  MALI_TRUE : MALI_FALSE;
+
+		mali_executor_complete_group(group, success, NULL, &job);
+
+		mali_executor_unlock();
+
+		if (NULL != job) {
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group,
+		mali_bool in_upper_half)
+{
+	enum mali_interrupt_result int_result;
+
+	MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n",
+			     mali_group_core_description(group),
+			     in_upper_half ? "upper" : "bottom"));
+
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_working(group));
+
+	int_result = mali_group_get_interrupt_result_mmu(group);
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_INTERRUPT_RESULT_NONE == int_result) {
+		/* No interrupts signalled, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#else
+	MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result);
+#endif
+
+	/* We should now have a real interrupt to handle */
+
+	if (in_upper_half) {
+		/* Don't bother to do processing of errors in upper half */
+
+		struct mali_group *parent = group->parent_group;
+
+		mali_mmu_mask_all_interrupts(group->mmu);
+
+		mali_executor_unlock();
+
+		if (NULL == parent) {
+			mali_group_schedule_bottom_half_mmu(group);
+		} else {
+			mali_group_schedule_bottom_half_mmu(parent);
+		}
+
+	} else {
+		struct mali_gp_job *gp_job = NULL;
+		struct mali_pp_job *pp_job = NULL;
+
+#ifdef DEBUG
+
+		u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu);
+		u32 status = mali_mmu_get_status(group->mmu);
+		MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n",
+				     (void *)(uintptr_t)fault_address,
+				     (status >> 6) & 0x1F,
+				     (status & 32) ? "write" : "read",
+				     group->mmu->hw_core.description));
+		MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n",
+				     mali_mmu_get_rawstat(group->mmu), status));
+		mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address);
+#endif
+
+		mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job);
+
+		mali_executor_unlock();
+
+		if (NULL != gp_job) {
+			MALI_DEBUG_ASSERT(NULL == pp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_TRUE, MALI_TRUE);
+		} else if (NULL != pp_job) {
+			MALI_DEBUG_ASSERT(NULL == gp_job);
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(pp_job,
+						       num_physical_pp_cores_total,
+						       MALI_TRUE, MALI_TRUE);
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_executor_group_oom(struct mali_group *group)
+{
+	struct mali_gp_job *job = NULL;
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+	mali_executor_lock();
+
+	job = mali_group_get_running_gp_job(group);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	/* Give group a chance to generate a SUSPEND event */
+	mali_group_oom(group);
+#endif
+
+	mali_gp_job_set_current_heap_addr(job, mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+
+	mali_executor_unlock();
+
+	if (_MALI_OSK_ERR_OK  == mali_mem_add_mem_size(job->session, job->heap_base_addr, job->heap_grow_size)) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		/* resume job with new heap,
+		* This will also re-enable interrupts
+		*/
+		mali_executor_lock();
+
+		mali_executor_send_gp_oom_to_user(job, job->heap_grow_size);
+
+		if (NULL != new_notification) {
+
+			mali_gp_job_set_oom_notification(job, new_notification);
+
+			mali_group_resume_gp_with_new_heap(group, mali_gp_job_get_id(job),
+							   job->heap_current_addr,
+							   job->heap_current_addr + job->heap_grow_size);
+		}
+		mali_executor_unlock();
+	} else {
+		mali_executor_lock();
+		mali_executor_send_gp_oom_to_user(job, 0);
+		mali_executor_unlock();
+	}
+
+}
+
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups)
+{
+	u32 i;
+	mali_bool child_groups_activated = MALI_FALSE;
+	mali_bool do_schedule = MALI_FALSE;
+#if defined(DEBUG)
+	u32 num_activated = 0;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_up(groups[i]);
+
+		if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) ||
+		     (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) {
+			/* nothing more to do for this group */
+			continue;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n",
+				     mali_group_core_description(groups[i])));
+
+#if defined(DEBUG)
+		num_activated++;
+#endif
+
+		if (mali_group_is_in_virtual(groups[i])) {
+			/*
+			 * At least one child group of virtual group is powered on.
+			 */
+			child_groups_activated = MALI_TRUE;
+		} else if (MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			/* Set gp and pp not in virtual to active. */
+			mali_group_set_active(groups[i]);
+		}
+
+		/* Move group from inactive to idle list */
+		if (groups[i] == gp_group) {
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  gp_group_state);
+			gp_group_state = EXEC_STATE_IDLE;
+		} else if (MALI_FALSE == mali_group_is_in_virtual(groups[i])
+			   && MALI_FALSE == mali_group_is_virtual(groups[i])) {
+			MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i],
+					  EXEC_STATE_INACTIVE));
+
+			mali_executor_change_state_pp_physical(groups[i],
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+		}
+
+		do_schedule = MALI_TRUE;
+	}
+
+	if (mali_executor_has_virtual_group() &&
+	    MALI_TRUE == child_groups_activated &&
+	    MALI_GROUP_STATE_ACTIVATION_PENDING ==
+	    mali_group_get_state(virtual_group)) {
+		/*
+		 * Try to active virtual group while it may be not sucessful every time,
+		 * because there is one situation that not all of child groups are powered on
+		 * in one time and virtual group is in activation pending state.
+		 */
+		if (mali_group_set_active(virtual_group)) {
+			/* Move group from inactive to idle */
+			MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE ==
+					  virtual_group_state);
+			virtual_group_state = EXEC_STATE_IDLE;
+
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u  physical activated, 1 virtual activated.\n", num_groups, num_activated));
+		} else {
+			MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+		}
+	} else {
+		MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated));
+	}
+
+	if (MALI_TRUE == do_schedule) {
+		/* Trigger a schedule */
+		mali_executor_schedule();
+	}
+
+	mali_executor_unlock();
+}
+
+void mali_executor_group_power_down(struct mali_group *groups[],
+				    u32 num_groups)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(groups);
+	MALI_DEBUG_ASSERT(0 < num_groups);
+
+	mali_executor_lock();
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups));
+
+	for (i = 0; i < num_groups; i++) {
+		/* Groups must be either disabled or inactive. while for virtual group,
+		 * it maybe in empty state, because when we meet pm_runtime_suspend,
+		 * virtual group could be powered off, and before we acquire mali_executor_lock,
+		 * we must release mali_pm_state_lock, if there is a new physical job was queued,
+		 * all of physical groups in virtual group could be pulled out, so we only can
+		 * powered down an empty virtual group. Those physical groups will be powered
+		 * up in following pm_runtime_resume callback function.
+		 */
+		MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i],
+				  EXEC_STATE_DISABLED) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_INACTIVE) ||
+				  mali_executor_group_is_in_state(groups[i],
+						  EXEC_STATE_EMPTY));
+
+		MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n",
+				     mali_group_core_description(groups[i])));
+
+		mali_group_power_down(groups[i]);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups));
+
+	mali_executor_unlock();
+}
+
+void mali_executor_abort_session(struct mali_session_data *session)
+{
+	struct mali_group *group;
+	struct mali_group *tmp_group;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Executor: Aborting all jobs from session 0x%08X.\n",
+			  session));
+
+	mali_executor_lock();
+
+	if (mali_group_get_session(gp_group) == session) {
+		if (EXEC_STATE_WORKING == gp_group_state) {
+			struct mali_gp_job *gp_job = NULL;
+
+			mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL);
+
+			MALI_DEBUG_ASSERT_POINTER(gp_job);
+
+			/* GP job completed, make sure it is freed */
+			mali_scheduler_complete_gp_job(gp_job, MALI_FALSE,
+						       MALI_FALSE, MALI_TRUE);
+		} else {
+			/* Same session, but not working, so just clear it */
+			mali_group_clear_session(gp_group);
+		}
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		if (EXEC_STATE_WORKING == virtual_group_state
+		    && mali_group_get_session(virtual_group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working,
+				    struct mali_group, executor_list) {
+		if (mali_group_get_session(group) == session) {
+			struct mali_pp_job *pp_job = NULL;
+
+			mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job);
+
+			if (NULL != pp_job) {
+				/* PP job completed, make sure it is freed */
+				mali_scheduler_complete_pp_job(pp_job, 0,
+							       MALI_FALSE, MALI_TRUE);
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) {
+		mali_group_clear_session(group);
+	}
+
+	mali_executor_unlock();
+}
+
+
+void mali_executor_core_scaling_enable(void)
+{
+	/* PS: Core scaling is by default enabled */
+	core_scaling_enabled = MALI_TRUE;
+}
+
+void mali_executor_core_scaling_disable(void)
+{
+	core_scaling_enabled = MALI_FALSE;
+}
+
+mali_bool mali_executor_core_scaling_is_enabled(void)
+{
+	return core_scaling_enabled;
+}
+
+void mali_executor_group_enable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_enable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+/*
+ * If a physical group is inactive or idle, we should disable it immediately,
+ * if group is in virtual, and virtual group is idle, disable given physical group in it.
+ */
+void mali_executor_group_disable(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+
+	if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group))
+	    && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) {
+		mali_executor_group_disable_internal(group);
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+}
+
+mali_bool mali_executor_group_is_disabled(struct mali_group *group)
+{
+	/* NB: This function is not optimized for time critical usage */
+
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	mali_executor_lock();
+	ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED);
+	mali_executor_unlock();
+
+	return ret;
+}
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override)
+{
+	if (target_core_nr == num_physical_pp_cores_enabled) return 0;
+	if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM;
+	if (target_core_nr > num_physical_pp_cores_total) return -EINVAL;
+	if (0 == target_core_nr) return -EINVAL;
+
+	mali_executor_core_scale(target_core_nr);
+
+	_mali_osk_wq_schedule_work(executor_wq_notify_core_change);
+
+	return 0;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	mali_executor_lock();
+
+	switch (gp_group_state) {
+	case EXEC_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state INACTIVE\n");
+		break;
+	case EXEC_STATE_IDLE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state IDLE\n");
+		break;
+	case EXEC_STATE_WORKING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in state WORKING\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP group is in unknown/illegal state %u\n",
+					gp_group_state);
+		break;
+	}
+
+	n += mali_group_dump_state(gp_group, buf + n, size - n);
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in WORKING state (count = %u):\n",
+				group_list_working_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in IDLE state (count = %u):\n",
+				group_list_idle_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in INACTIVE state (count = %u):\n",
+				group_list_inactive_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"Physical PP groups in DISABLED state (count = %u):\n",
+				group_list_disabled_count);
+
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		n += mali_group_dump_state(group, buf + n, size - n);
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		switch (virtual_group_state) {
+		case EXEC_STATE_EMPTY:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state EMPTY\n");
+			break;
+		case EXEC_STATE_INACTIVE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state INACTIVE\n");
+			break;
+		case EXEC_STATE_IDLE:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state IDLE\n");
+			break;
+		case EXEC_STATE_WORKING:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in state WORKING\n");
+			break;
+		default:
+			n += _mali_osk_snprintf(buf + n, size - n,
+						"Virtual PP group is in unknown/illegal state %u\n",
+						virtual_group_state);
+			break;
+		}
+
+		n += mali_group_dump_state(virtual_group, buf + n, size - n);
+	}
+
+	mali_executor_unlock();
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_total_cores = num_physical_pp_cores_total;
+	args->number_of_enabled_cores = num_physical_pp_cores_enabled;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = pp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->number_of_cores = 1;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+	args->version = gp_version;
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) {
+		_mali_osk_notification_t *new_notification = NULL;
+
+		new_notification = _mali_osk_notification_create(
+					   _MALI_NOTIFICATION_GP_STALLED,
+					   sizeof(_mali_uk_gp_job_suspended_s));
+
+		if (NULL != new_notification) {
+			MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n",
+					     args->cookie, args->arguments[0], args->arguments[1]));
+
+			mali_executor_lock();
+
+			/* Resume the job in question if it is still running */
+			job = mali_group_get_running_gp_job(gp_group);
+			if (NULL != job &&
+			    args->cookie == mali_gp_job_get_id(job) &&
+			    session == mali_gp_job_get_session(job)) {
+				/*
+				 * Correct job is running, resume with new heap
+				 */
+
+				mali_gp_job_set_oom_notification(job,
+								 new_notification);
+
+				/* This will also re-enable interrupts */
+				mali_group_resume_gp_with_new_heap(gp_group,
+								   args->cookie,
+								   args->arguments[0],
+								   args->arguments[1]);
+
+				job->heap_base_addr =  args->arguments[0];
+				job->heap_current_addr = args->arguments[0];
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Executor: Unable to resume, GP job no longer running.\n"));
+
+				_mali_osk_notification_delete(new_notification);
+
+				mali_executor_unlock();
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n"));
+		}
+	} else {
+		MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie));
+	}
+
+	mali_executor_lock();
+
+	/* Abort the job in question if it is still running */
+	job = mali_group_get_running_gp_job(gp_group);
+	if (NULL != job &&
+	    args->cookie == mali_gp_job_get_id(job) &&
+	    session == mali_gp_job_get_session(job)) {
+		/* Correct job is still running */
+		struct mali_gp_job *job_done = NULL;
+
+		mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL);
+
+		/* The same job should have completed */
+		MALI_DEBUG_ASSERT(job_done == job);
+
+		/* GP job completed, make sure it is freed */
+		mali_scheduler_complete_gp_job(job_done, MALI_FALSE,
+					       MALI_TRUE, MALI_TRUE);
+	}
+
+	mali_executor_unlock();
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+void mali_executor_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_executor_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Executor: lock taken\n"));
+}
+
+void mali_executor_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n"));
+	_mali_osk_spinlock_irq_unlock(mali_executor_lock_obj);
+}
+
+static mali_bool mali_executor_is_suspended(void *data)
+{
+	mali_bool ret;
+
+	/* This callback does not use the data pointer. */
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	ret = pause_count > 0 && !mali_executor_is_working();
+
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static mali_bool mali_executor_is_working()
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return (0 != group_list_working_count ||
+		EXEC_STATE_WORKING == gp_group_state ||
+		EXEC_STATE_WORKING == virtual_group_state);
+}
+
+static void mali_executor_disable_empty_virtual(void)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY);
+	MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING);
+
+	if (mali_group_is_empty(virtual_group)) {
+		virtual_group_state = EXEC_STATE_EMPTY;
+	}
+}
+
+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	/* Only rejoining after job has completed (still active) */
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+			  mali_group_get_state(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group());
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group));
+
+	/* Make sure group and virtual group have same status */
+
+	if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) {
+		if (mali_group_deactivate(group)) {
+			trigger_pm_update = MALI_TRUE;
+		}
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else if (MALI_GROUP_STATE_ACTIVATION_PENDING ==
+		   mali_group_get_state(virtual_group)) {
+		/*
+		 * Activation is pending for virtual group, leave
+		 * this child group as active.
+		 */
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE ==
+				  mali_group_get_state(virtual_group));
+
+		if (virtual_group_state == EXEC_STATE_EMPTY) {
+			virtual_group_state = EXEC_STATE_IDLE;
+		}
+	}
+
+	/* Remove group from idle list */
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group,
+			  EXEC_STATE_IDLE));
+	_mali_osk_list_delinit(&group->executor_list);
+	group_list_idle_count--;
+
+	/*
+	 * And finally rejoin the virtual group
+	 * group will start working on same job as virtual_group,
+	 * if virtual_group is working on a job
+	 */
+	mali_group_add_group(virtual_group, group);
+
+	return trigger_pm_update;
+}
+
+static mali_bool mali_executor_has_virtual_group(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_virtual_group_is_usable(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return ((EXEC_STATE_INACTIVE == virtual_group_state ||
+		 EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ?
+	       MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+}
+
+static mali_bool mali_executor_tackle_gp_bound(void)
+{
+	struct mali_pp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	job = mali_scheduler_job_pp_physical_peek();
+
+	if (NULL != job && MALI_TRUE == mali_is_mali400()) {
+		if (0 < group_list_working_count &&
+		    mali_pp_job_is_large_and_unstarted(job)) {
+			return MALI_TRUE;
+		}
+	}
+
+	return MALI_FALSE;
+}
+
+/*
+ * This is where jobs are actually started.
+ */
+static void mali_executor_schedule(void)
+{
+	u32 i;
+	u32 num_physical_needed = 0;
+	u32 num_physical_to_process = 0;
+	mali_bool trigger_pm_update = MALI_FALSE;
+	mali_bool deactivate_idle_group = MALI_TRUE;
+
+	/* Physical groups + jobs to start in this function */
+	struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS];
+	int num_jobs_to_start = 0;
+
+	/* Virtual job to start in this function */
+	struct mali_pp_job *virtual_job_to_start = NULL;
+
+	/* GP job to start in this function */
+	struct mali_gp_job *gp_job_to_start = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (pause_count > 0) {
+		/* Execution is suspended, don't schedule any jobs. */
+		return;
+	}
+
+	/* Lock needed in order to safely handle the job queues */
+	mali_scheduler_lock();
+
+	/* 1. Activate gp firstly if have gp job queued. */
+	if (EXEC_STATE_INACTIVE == gp_group_state &&
+	    0 < mali_scheduler_job_gp_count()) {
+
+		enum mali_group_state state =
+			mali_group_activate(gp_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set GP group state to idle */
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 2. Prepare as many physical groups as needed/possible */
+
+	num_physical_needed = mali_scheduler_job_physical_head_count();
+
+	/* On mali-450 platform, we don't need to enter in this block frequently. */
+	if (0 < num_physical_needed) {
+
+		if (num_physical_needed <= group_list_idle_count) {
+			/* We have enough groups on idle list already */
+			num_physical_to_process = num_physical_needed;
+			num_physical_needed = 0;
+		} else {
+			/* We need to get a hold of some more groups */
+			num_physical_to_process = group_list_idle_count;
+			num_physical_needed -= group_list_idle_count;
+		}
+
+		if (0 < num_physical_needed) {
+
+			/* 2.1. Activate groups which are inactive */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive,
+						    struct mali_group, executor_list) {
+				enum mali_group_state state =
+					mali_group_activate(group);
+				if (MALI_GROUP_STATE_ACTIVE == state) {
+					/* Move from inactive to idle */
+					mali_executor_change_state_pp_physical(group,
+									       &group_list_inactive,
+									       &group_list_inactive_count,
+									       &group_list_idle,
+									       &group_list_idle_count);
+					num_physical_to_process++;
+				} else {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				num_physical_needed--;
+				if (0 == num_physical_needed) {
+					/* We have activated all the groups we need */
+					break;
+				}
+			}
+		}
+
+		if (mali_executor_virtual_group_is_usable()) {
+
+			/*
+			 * 2.2. And finally, steal and activate groups
+			 * from virtual group if we need even more
+			 */
+			while (0 < num_physical_needed) {
+				struct mali_group *group;
+
+				group = mali_group_acquire_group(virtual_group);
+				if (NULL != group) {
+					enum mali_group_state state;
+
+					mali_executor_disable_empty_virtual();
+
+					state = mali_group_activate(group);
+					if (MALI_GROUP_STATE_ACTIVE == state) {
+						/* Group is ready, add to idle list */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_idle);
+						group_list_idle_count++;
+						num_physical_to_process++;
+					} else {
+						/*
+						 * Group is not ready yet,
+						 * add to inactive list
+						 */
+						_mali_osk_list_add(
+							&group->executor_list,
+							&group_list_inactive);
+						group_list_inactive_count++;
+
+						trigger_pm_update = MALI_TRUE;
+					}
+					num_physical_needed--;
+				} else {
+					/*
+					 * We could not get enough groups
+					 * from the virtual group.
+					 */
+					break;
+				}
+			}
+		}
+
+		/* 2.3. Assign physical jobs to groups */
+
+		if (0 < num_physical_to_process) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle,
+						    struct mali_group, executor_list) {
+				struct mali_pp_job *job = NULL;
+				u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+
+				MALI_DEBUG_ASSERT(num_jobs_to_start <
+						  MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				MALI_DEBUG_ASSERT(0 <
+						  mali_scheduler_job_physical_head_count());
+
+				if (mali_executor_hint_is_enabled(
+					    MALI_EXECUTOR_HINT_GP_BOUND)) {
+					if (MALI_TRUE == mali_executor_tackle_gp_bound()) {
+						/*
+						* We're gp bound,
+						* don't start this right now.
+						*/
+						deactivate_idle_group = MALI_FALSE;
+						num_physical_to_process = 0;
+						break;
+					}
+				}
+
+				job = mali_scheduler_job_pp_physical_get(
+					      &sub_job);
+
+				MALI_DEBUG_ASSERT_POINTER(job);
+				MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS);
+
+				/* Put job + group on list of jobs to start later on */
+
+				groups_to_start[num_jobs_to_start] = group;
+				jobs_to_start[num_jobs_to_start] = job;
+				sub_jobs_to_start[num_jobs_to_start] = sub_job;
+				num_jobs_to_start++;
+
+				/* Move group from idle to working */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_working,
+								       &group_list_working_count);
+
+				num_physical_to_process--;
+				if (0 == num_physical_to_process) {
+					/* Got all we needed */
+					break;
+				}
+			}
+		}
+	}
+
+
+	/* 3. Deactivate idle pp group , must put deactive here before active vitual group
+	 *    for cover case first only has physical job in normal queue but group inactive,
+	 *    so delay the job start go to active group, when group activated,
+	 *    call scheduler again, but now if we get high queue virtual job,
+	 *    we will do nothing in schedule cause executor schedule stop
+	 */
+
+	if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group
+			&& (!mali_timeline_has_physical_pp_job()))) {
+		trigger_pm_update = MALI_TRUE;
+	}
+
+	/* 4. Activate virtual group, if needed */
+
+	if (EXEC_STATE_INACTIVE == virtual_group_state &&
+	    0 < mali_scheduler_job_next_is_virtual()) {
+		enum mali_group_state state =
+			mali_group_activate(virtual_group);
+		if (MALI_GROUP_STATE_ACTIVE == state) {
+			/* Set virtual group state to idle */
+			virtual_group_state = EXEC_STATE_IDLE;
+		} else {
+			trigger_pm_update = MALI_TRUE;
+		}
+	}
+
+	/* 5. To power up group asap, we trigger pm update here. */
+
+	if (MALI_TRUE == trigger_pm_update) {
+		trigger_pm_update = MALI_FALSE;
+		mali_pm_update_async();
+	}
+
+	/* 6. Assign jobs to idle virtual group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == virtual_group_state) {
+		if (0 < mali_scheduler_job_next_is_virtual()) {
+			virtual_job_to_start =
+				mali_scheduler_job_pp_virtual_get();
+			virtual_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_virtual_pp_job()) {
+			virtual_group_state = EXEC_STATE_INACTIVE;
+
+			if (mali_group_deactivate(virtual_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 7. Assign job to idle GP group (or deactivate if no job) */
+
+	if (EXEC_STATE_IDLE == gp_group_state) {
+		if (0 < mali_scheduler_job_gp_count()) {
+			gp_job_to_start = mali_scheduler_job_gp_get();
+			gp_group_state = EXEC_STATE_WORKING;
+		} else if (!mali_timeline_has_gp_job()) {
+			gp_group_state = EXEC_STATE_INACTIVE;
+			if (mali_group_deactivate(gp_group)) {
+				trigger_pm_update = MALI_TRUE;
+			}
+		}
+	}
+
+	/* 8. We no longer need the schedule/queue lock */
+
+	mali_scheduler_unlock();
+
+	/* 9. start jobs */
+
+	if (NULL != virtual_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group));
+		mali_group_start_pp_job(virtual_group,
+					virtual_job_to_start, 0);
+	}
+
+	for (i = 0; i < num_jobs_to_start; i++) {
+		MALI_DEBUG_ASSERT(!mali_group_pp_is_active(
+					  groups_to_start[i]));
+		mali_group_start_pp_job(groups_to_start[i],
+					jobs_to_start[i],
+					sub_jobs_to_start[i]);
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(gp_group);
+
+	if (NULL != gp_job_to_start) {
+		MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group));
+		mali_group_start_gp_job(gp_group, gp_job_to_start);
+	}
+
+	/* 10. Trigger any pending PM updates */
+	if (MALI_TRUE == trigger_pm_update) {
+		mali_pm_update_async();
+	}
+}
+
+/* Handler for deferred schedule requests */
+static void mali_executor_wq_schedule(void *arg)
+{
+	MALI_IGNORE(arg);
+	mali_executor_lock();
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job, u32 added_size)
+{
+	_mali_uk_gp_job_suspended_s *jobres;
+	_mali_osk_notification_t *notification;
+
+	notification = mali_gp_job_get_oom_notification(job);
+
+	/*
+	 * Remember the id we send to user space, so we have something to
+	 * verify when we get a response
+	 */
+	gp_returned_cookie = mali_gp_job_get_id(job);
+
+	jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer;
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	jobres->cookie = gp_returned_cookie;
+	jobres->heap_added_size = added_size;
+	mali_session_send_notification(mali_gp_job_get_session(job),
+				       notification);
+}
+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_gp_job *job;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_gp(group, success);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	gp_group_state = EXEC_STATE_IDLE;
+
+	/* This will potentially queue more GP and PP jobs */
+	mali_timeline_tracker_release(&job->tracker);
+
+	/* Signal PP job */
+	mali_gp_job_signal_pp_tracker(job, success);
+
+	return job;
+}
+
+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group,
+		mali_bool success)
+{
+	struct mali_pp_job *job;
+	u32 sub_job;
+	mali_bool job_is_done;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Extracts the needed HW status from core and reset */
+	job = mali_group_complete_pp(group, success, &sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Core is now ready to go into idle list */
+	if (mali_group_is_virtual(group)) {
+		virtual_group_state = EXEC_STATE_IDLE;
+	} else {
+		/* Move from working to idle state */
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_working,
+						       &group_list_working_count,
+						       &group_list_idle,
+						       &group_list_idle_count);
+	}
+
+	/* It is the executor module which owns the jobs themselves by now */
+	mali_pp_job_mark_sub_job_completed(job, success);
+	job_is_done = mali_pp_job_is_complete(job);
+
+	if (job_is_done) {
+		/* This will potentially queue more GP and PP jobs */
+		mali_timeline_tracker_release(&job->tracker);
+	}
+
+	return job;
+}
+
+static void mali_executor_complete_group(struct mali_group *group,
+		mali_bool success,
+		struct mali_gp_job **gp_job_done,
+		struct mali_pp_job **pp_job_done)
+{
+	struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+	struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	mali_bool pp_job_is_done = MALI_TRUE;
+
+	if (NULL != gp_core) {
+		gp_job = mali_executor_complete_gp(group, success);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(pp_core);
+		MALI_IGNORE(pp_core);
+		pp_job = mali_executor_complete_pp(group, success);
+
+		pp_job_is_done = mali_pp_job_is_complete(pp_job);
+	}
+
+	if (pause_count > 0) {
+		/* Execution has been suspended */
+
+		if (!mali_executor_is_working()) {
+			/* Last job completed, wake up sleepers */
+			_mali_osk_wait_queue_wake_up(
+				executor_working_wait_queue);
+		}
+	} else if (MALI_TRUE == mali_group_disable_requested(group)) {
+		mali_executor_core_scale_in_group_complete(group);
+
+		mali_executor_schedule();
+	} else {
+		/* try to schedule new jobs */
+		mali_executor_schedule();
+	}
+
+	if (NULL != gp_job) {
+		MALI_DEBUG_ASSERT_POINTER(gp_job_done);
+		*gp_job_done = gp_job;
+	} else if (pp_job_is_done) {
+		MALI_DEBUG_ASSERT_POINTER(pp_job);
+		MALI_DEBUG_ASSERT_POINTER(pp_job_done);
+		*pp_job_done = pp_job;
+	}
+}
+
+static void mali_executor_change_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *old_list,
+		u32 *old_count,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	/*
+	 * It's a bit more complicated to change the state for the physical PP
+	 * groups since their state is determined by the list they are on.
+	 */
+#if defined(DEBUG)
+	mali_bool found = MALI_FALSE;
+	struct mali_group *group_iter;
+	struct mali_group *temp;
+	u32 old_counted = 0;
+	u32 new_counted = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(old_list);
+	MALI_DEBUG_ASSERT_POINTER(old_count);
+	MALI_DEBUG_ASSERT_POINTER(new_list);
+	MALI_DEBUG_ASSERT_POINTER(new_count);
+
+	/*
+	 * Verify that group is present on old list,
+	 * and that the count is correct
+	 */
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list,
+				    struct mali_group, executor_list) {
+		old_counted++;
+		if (group == group_iter) {
+			found = MALI_TRUE;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list,
+				    struct mali_group, executor_list) {
+		new_counted++;
+	}
+
+	if (MALI_FALSE == found) {
+		if (old_list == &group_list_idle) {
+			MALI_DEBUG_PRINT(1, (" old Group list is idle,"));
+		} else if (old_list == &group_list_inactive) {
+			MALI_DEBUG_PRINT(1, (" old Group list is inactive,"));
+		} else if (old_list == &group_list_working) {
+			MALI_DEBUG_PRINT(1, (" old Group list is working,"));
+		} else if (old_list == &group_list_disabled) {
+			MALI_DEBUG_PRINT(1, (" old Group list is disable,"));
+		}
+
+		if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) {
+			MALI_DEBUG_PRINT(1, (" group in working \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) {
+			MALI_DEBUG_PRINT(1, (" group in inactive \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) {
+			MALI_DEBUG_PRINT(1, (" group in idle \n"));
+		} else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) {
+			MALI_DEBUG_PRINT(1, (" but group in disabled \n"));
+		}
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == found);
+	MALI_DEBUG_ASSERT(0 < (*old_count));
+	MALI_DEBUG_ASSERT((*old_count) == old_counted);
+	MALI_DEBUG_ASSERT((*new_count) == new_counted);
+#endif
+
+	_mali_osk_list_move(&group->executor_list, new_list);
+	(*old_count)--;
+	(*new_count)++;
+}
+
+static void mali_executor_set_state_pp_physical(struct mali_group *group,
+		_mali_osk_list_t *new_list,
+		u32 *new_count)
+{
+	_mali_osk_list_add(&group->executor_list, new_list);
+	(*new_count)++;
+}
+
+static mali_bool mali_executor_group_is_in_state(struct mali_group *group,
+		enum mali_executor_state_t state)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (gp_group == group) {
+		if (gp_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else if (virtual_group == group || mali_group_is_in_virtual(group)) {
+		if (virtual_group_state == state) {
+			return MALI_TRUE;
+		}
+	} else {
+		/* Physical PP group */
+		struct mali_group *group_iter;
+		struct mali_group *temp;
+		_mali_osk_list_t *list;
+
+		if (EXEC_STATE_DISABLED == state) {
+			list = &group_list_disabled;
+		} else if (EXEC_STATE_INACTIVE == state) {
+			list = &group_list_inactive;
+		} else if (EXEC_STATE_IDLE == state) {
+			list = &group_list_idle;
+		} else {
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state);
+			list = &group_list_working;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list,
+					    struct mali_group, executor_list) {
+			if (group_iter == group) {
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* group not in correct state */
+	return MALI_FALSE;
+}
+
+static void mali_executor_group_enable_internal(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	/* Put into inactive state (== "lowest" enabled state) */
+	if (group == gp_group) {
+		MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state);
+		gp_group_state = EXEC_STATE_INACTIVE;
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_disabled,
+						       &group_list_disabled_count,
+						       &group_list_inactive,
+						       &group_list_inactive_count);
+
+		++num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) {
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group));
+
+		/* Move from inactive to idle */
+		if (group == gp_group) {
+			gp_group_state = EXEC_STATE_IDLE;
+		} else {
+			mali_executor_change_state_pp_physical(group,
+							       &group_list_inactive,
+							       &group_list_inactive_count,
+							       &group_list_idle,
+							       &group_list_idle_count);
+
+			if (mali_executor_has_virtual_group()) {
+				if (mali_executor_physical_rejoin_virtual(group)) {
+					mali_pm_update_async();
+				}
+			}
+		}
+	} else {
+		mali_pm_update_async();
+	}
+}
+
+static void mali_executor_group_disable_internal(struct mali_group *group)
+{
+	mali_bool working;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED));
+
+	working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING);
+	if (MALI_TRUE == working) {
+		/** Group to be disabled once it completes current work,
+		 * when virtual group completes, also check child groups for this flag */
+		mali_group_set_disable_request(group, MALI_TRUE);
+		return;
+	}
+
+	/* Put into disabled state */
+	if (group == gp_group) {
+		/* GP group */
+		MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state);
+		gp_group_state = EXEC_STATE_DISABLED;
+	} else {
+		if (mali_group_is_in_virtual(group)) {
+			/* A child group of virtual group. move the specific group from virtual group */
+			MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state);
+
+			mali_executor_set_state_pp_physical(group,
+							    &group_list_disabled,
+							    &group_list_disabled_count);
+
+			mali_group_remove_group(virtual_group, group);
+			mali_executor_disable_empty_virtual();
+		} else {
+			mali_executor_change_group_status_disabled(group);
+		}
+
+		--num_physical_pp_cores_enabled;
+		MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id));
+	}
+
+	if (MALI_GROUP_STATE_INACTIVE != group->state) {
+		if (MALI_TRUE == mali_group_deactivate(group)) {
+			mali_pm_update_async();
+		}
+	}
+}
+
+static void mali_executor_notify_core_change(u32 num_cores)
+{
+	mali_bool done = MALI_FALSE;
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			/* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer;
+				data->number_of_enabled_cores = num_cores;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+static mali_bool mali_executor_core_scaling_is_done(void *data)
+{
+	u32 i;
+	u32 num_groups;
+	mali_bool ret = MALI_TRUE;
+
+	MALI_IGNORE(data);
+
+	mali_executor_lock();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) {
+				ret = MALI_FALSE;
+				break;
+			}
+		}
+	}
+	mali_executor_unlock();
+
+	return ret;
+}
+
+static void mali_executor_wq_notify_core_change(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		return;
+	}
+
+	_mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue,
+					mali_executor_core_scaling_is_done, NULL);
+
+	mali_executor_notify_core_change(num_physical_pp_cores_enabled);
+}
+
+/**
+ * Clear all disable request from the _last_ core scaling behavior.
+ */
+static void mali_executor_core_scaling_reset(void)
+{
+	u32 i;
+	u32 num_groups;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	num_groups = mali_group_get_glob_num_groups();
+
+	for (i = 0; i < num_groups; i++) {
+		struct mali_group *group = mali_group_get_glob_group(i);
+
+		if (NULL != group) {
+			group->disable_requested = MALI_FALSE;
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		core_scaling_delay_up_mask[i] = 0;
+	}
+}
+
+static void mali_executor_core_scale(unsigned int target_core_nr)
+{
+	int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	int i;
+
+	MALI_DEBUG_ASSERT(0 < target_core_nr);
+	MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr);
+
+	mali_executor_lock();
+
+	if (target_core_nr < num_physical_pp_cores_enabled) {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled));
+	}
+
+	/* When a new core scaling request is comming,  we should remove the un-doing
+	 * part of the last core scaling request.  It's safe because we have only one
+	 * lock(executor lock) protection. */
+	mali_executor_core_scaling_reset();
+
+	mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask);
+	mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i];
+		MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i]));
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 > target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_disable_internal(group);
+						target_core_scaling_mask[i]++;
+						if ((0 == target_core_scaling_mask[i])) {
+							break;
+						}
+
+					}
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		/**
+		 * Target_core_scaling_mask[i] is bigger than 0,
+		 * means we need to enable some pp cores in
+		 * this domain whose domain index is i.
+		 */
+		if (0 < target_core_scaling_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (num_physical_pp_cores_enabled >= target_core_nr) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)
+					    && (!mali_group_is_virtual(group))) {
+						mali_executor_group_enable_internal(group);
+						target_core_scaling_mask[i]--;
+
+						if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Here, we may still have some pp cores not been enabled because of some
+	 * pp cores need to be disabled are still in working state.
+	 */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < target_core_scaling_mask[i]) {
+			core_scaling_delay_up_mask[i] = target_core_scaling_mask[i];
+		}
+	}
+
+	mali_executor_schedule();
+	mali_executor_unlock();
+}
+
+static void mali_executor_core_scale_in_group_complete(struct mali_group *group)
+{
+	int num_pp_cores_disabled = 0;
+	int num_pp_cores_to_enable = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group));
+
+	/* Disable child group of virtual group */
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			if (MALI_TRUE == mali_group_disable_requested(child)) {
+				mali_group_set_disable_request(child, MALI_FALSE);
+				mali_executor_group_disable_internal(child);
+				num_pp_cores_disabled++;
+			}
+		}
+		mali_group_set_disable_request(group, MALI_FALSE);
+	} else {
+		mali_executor_group_disable_internal(group);
+		mali_group_set_disable_request(group, MALI_FALSE);
+		if (NULL != mali_group_get_pp_core(group)) {
+			num_pp_cores_disabled++;
+		}
+	}
+
+	num_pp_cores_to_enable = num_pp_cores_disabled;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0 < core_scaling_delay_up_mask[i]) {
+			struct mali_pm_domain *domain;
+
+			if (0 == num_pp_cores_to_enable) {
+				break;
+			}
+
+			domain = mali_pm_domain_get_from_index(i);
+
+			/* Domain is valid and has pp cores */
+			if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) {
+				struct mali_group *disabled_group;
+				struct mali_group *temp;
+
+				_MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) {
+					if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) {
+						mali_executor_group_enable_internal(disabled_group);
+						core_scaling_delay_up_mask[i]--;
+						num_pp_cores_to_enable--;
+
+						if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) {
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	_mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue);
+}
+
+static void mali_executor_change_group_status_disabled(struct mali_group *group)
+{
+	/* Physical PP group */
+	mali_bool idle;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE);
+	if (MALI_TRUE == idle) {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_idle,
+						       &group_list_idle_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	} else {
+		mali_executor_change_state_pp_physical(group,
+						       &group_list_inactive,
+						       &group_list_inactive_count,
+						       &group_list_disabled,
+						       &group_list_disabled_count);
+	}
+}
+
+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group)
+{
+	mali_bool trigger_pm_update = MALI_FALSE;
+
+	if (group_list_idle_count > 0) {
+		if (mali_executor_has_virtual_group()) {
+
+			/* Rejoin virtual group on Mali-450 */
+
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_executor_physical_rejoin_virtual(
+					    group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+			}
+		} else if (deactivate_idle_group) {
+			struct mali_group *group;
+			struct mali_group *temp;
+
+			/* Deactivate group on Mali-300/400 */
+
+			_MALI_OSK_LIST_FOREACHENTRY(group, temp,
+						    &group_list_idle,
+						    struct mali_group, executor_list) {
+				if (mali_group_deactivate(group)) {
+					trigger_pm_update = MALI_TRUE;
+				}
+
+				/* Move from idle to inactive */
+				mali_executor_change_state_pp_physical(group,
+								       &group_list_idle,
+								       &group_list_idle_count,
+								       &group_list_inactive,
+								       &group_list_inactive_count);
+			}
+		}
+	}
+
+	return trigger_pm_update;
+}
+
+void mali_executor_running_status_print(void)
+{
+	struct mali_group *group = NULL;
+	struct mali_group *temp = NULL;
+
+	MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job));
+	if ((gp_group->gp_core) && (gp_group->is_working)) {
+		mali_group_dump_status(gp_group);
+	}
+	MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) {
+		MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job));
+		mali_group_dump_status(group);
+	}
+	MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+	MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count));
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) {
+		MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off"));
+		MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description));
+	}
+
+	if (mali_executor_has_virtual_group()) {
+		MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job));
+		MALI_PRINT(("Virtual group status: %d\n", virtual_group_state));
+		MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state));
+		MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off"));
+		_MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list,
+					    struct mali_group, group_list) {
+			int i = 0;
+			MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job));
+			MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state));
+			MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off"));
+			if (group->pm_domain) {
+				MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain)));
+				MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain)));
+				MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain)));
+				MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off"));
+				MALI_PRINT(("\tWanted  power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off"));
+			}
+
+			for (i = 0; i < 2; i++) {
+				if (NULL != group->l2_cache_core[i]) {
+					struct mali_pm_domain *domain;
+					domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]);
+					MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off"));
+					if (domain) {
+						MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain)));
+						MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain)));
+						MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain)));
+						MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off"));
+						MALI_PRINT(("\tL2 Wanted  power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off"));
+					}
+				}
+			}
+		}
+		if (EXEC_STATE_WORKING == virtual_group_state) {
+			mali_group_dump_status(virtual_group);
+		}
+	}
+}
+
+void mali_executor_status_dump(void)
+{
+	mali_executor_lock();
+	mali_scheduler_lock();
+
+	/* print schedule queue status */
+	mali_scheduler_gp_pp_job_queue_print();
+
+	mali_scheduler_unlock();
+	mali_executor_unlock();
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_executor.h b/drivers/gpu/arm/utgard/common/mali_executor.h
new file mode 100644
index 000000000000..a756d3f40c2a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_executor.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2012, 2014-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_EXECUTOR_H__
+#define __MALI_EXECUTOR_H__
+
+#include "mali_osk.h"
+#include "mali_scheduler_types.h"
+#include "mali_kernel_common.h"
+
+typedef enum {
+	MALI_EXECUTOR_HINT_GP_BOUND = 0
+#define MALI_EXECUTOR_HINT_MAX        1
+} mali_executor_hint;
+
+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX];
+
+/* forward declare struct instead of using include */
+struct mali_session_data;
+struct mali_group;
+struct mali_pp_core;
+
+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj;
+
+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+_mali_osk_errcode_t mali_executor_initialize(void);
+void mali_executor_terminate(void);
+
+void mali_executor_populate(void);
+void mali_executor_depopulate(void);
+
+void mali_executor_suspend(void);
+void mali_executor_resume(void);
+
+u32 mali_executor_get_num_cores_total(void);
+u32 mali_executor_get_num_cores_enabled(void);
+struct mali_pp_core *mali_executor_get_virtual_pp(void);
+struct mali_group *mali_executor_get_virtual_group(void);
+
+void mali_executor_zap_all_active(struct mali_session_data *session);
+
+/**
+ * Schedule GP and PP according to bitmask.
+ *
+ * @param mask A scheduling bitmask.
+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not.
+ */
+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule);
+
+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half);
+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half);
+
+void mali_executor_group_oom(struct mali_group *group);
+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups);
+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups);
+
+void mali_executor_abort_session(struct mali_session_data *session);
+
+void mali_executor_core_scaling_enable(void);
+void mali_executor_core_scaling_disable(void);
+mali_bool mali_executor_core_scaling_is_enabled(void);
+
+void mali_executor_group_enable(struct mali_group *group);
+void mali_executor_group_disable(struct mali_group *group);
+mali_bool mali_executor_group_is_disabled(struct mali_group *group);
+
+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override);
+
+#if MALI_STATE_TRACKING
+u32 mali_executor_dump_state(char *buf, u32 size);
+#endif
+
+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	mali_executor_hints[hint] = MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint)
+{
+	MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX);
+	return mali_executor_hints[hint];
+}
+
+void mali_executor_running_status_print(void);
+void mali_executor_status_dump(void);
+void mali_executor_lock(void);
+void mali_executor_unlock(void);
+#endif /* __MALI_EXECUTOR_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_gp.c b/drivers/gpu/arm/utgard/common/mali_gp.c
new file mode 100644
index 000000000000..a690781837e9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_gp.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "regs/mali_gp_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+static struct mali_gp_core *mali_global_gp_core = NULL;
+
+/* Interrupt handlers */
+static void mali_gp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group)
+{
+	struct mali_gp_core *core = NULL;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_gp_core);
+	MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description));
+
+	core = _mali_osk_malloc(sizeof(struct mali_gp_core));
+	if (NULL != core) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_gp_reset(core);
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_gp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_gp,
+								       group,
+								       mali_gp_irq_probe_trigger,
+								       mali_gp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core));
+						mali_global_gp_core = core;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_gp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_gp_delete(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+	mali_global_gp_core = NULL;
+	_mali_osk_free(core);
+}
+
+void mali_gp_stop_bus(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_gp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_SLOW == i) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_gp_hard_reset(struct mali_gp_core *core)
+{
+	const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	const u32 reset_default_value = 0;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+}
+
+void mali_gp_reset_async(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET);
+
+}
+
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+		if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) {
+			break;
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core)
+{
+	mali_gp_reset_async(core);
+	return mali_gp_reset_wait(core);
+}
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 startcmd = 0;
+	u32 *frame_registers = mali_gp_job_get_frame_registers(job);
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	if (mali_gp_job_has_vs_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS;
+	}
+
+	if (mali_gp_job_has_plbu_job(job)) {
+		startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU;
+	}
+
+	MALI_DEBUG_ASSERT(0 != startcmd);
+
+	mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE);
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd));
+
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_GP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd);
+#else
+	{
+		u32 bits = 0;
+
+		if (mali_gp_job_has_vs_job(job))
+			bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+		if (mali_gp_job_has_plbu_job(job))
+			bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+
+		mali_hw_core_register_write_relaxed(&core->hw_core,
+						    MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits);
+	}
+#endif
+
+	/* Barrier to make sure the previous register write is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr)
+{
+	u32 irq_readout;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+
+	if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG));
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr);
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr);
+
+		MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n"));
+
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC);
+		_mali_osk_write_mem_barrier();
+	}
+	/*
+	 * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response.
+	 * A timeout or a page fault on Mali-200 PP core can cause this behaviour.
+	 */
+}
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void)
+{
+	return mali_global_gp_core;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_gp_irq_probe_trigger(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data)
+{
+	struct mali_gp_core *core = (struct mali_gp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT);
+	if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+/* ------ local helper functions below --------- */
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description);
+
+	return n;
+}
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job);
+	u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job);
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_gp_job_set_perf_counter_value0(job, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0);
+#endif
+
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_gp_job_set_perf_counter_value1(job, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1);
+		_mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1);
+#endif
+	}
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_gp.h b/drivers/gpu/arm/utgard/common/mali_gp.h
new file mode 100644
index 000000000000..8d5f69c23229
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_gp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_H__
+#define __MALI_GP_H__
+
+#include "mali_osk.h"
+#include "mali_gp_job.h"
+#include "mali_hw_core.h"
+#include "regs/mali_gp_regs.h"
+
+struct mali_group;
+
+/**
+ * Definition of the GP core struct
+ * Used to track a GP core in the system.
+ */
+struct mali_gp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_gp_initialize(void);
+void mali_gp_terminate(void);
+
+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group);
+void mali_gp_delete(struct mali_gp_core *core);
+
+void mali_gp_stop_bus(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core);
+void mali_gp_reset_async(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core);
+void mali_gp_hard_reset(struct mali_gp_core *core);
+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core);
+
+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job);
+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr);
+
+u32 mali_gp_core_get_version(struct mali_gp_core *core);
+
+struct mali_gp_core *mali_gp_get_global_gp_core(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size);
+#endif
+
+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job);
+
+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core)
+{
+	u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) &
+			MALIGP2_REG_VAL_IRQ_MASK_USED;
+
+	if (0 == stat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST |
+		    MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	} else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_VS;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS_PLBU;
+	} else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) {
+		return MALI_INTERRUPT_RESULT_OOM;
+	}
+
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS);
+	return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions)
+{
+	/* Enable all interrupts, except those specified in exceptions */
+	u32 value;
+
+	if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) {
+		/* Enable all used except VS complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST;
+	} else {
+		MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU ==
+				  exceptions);
+		/* Enable all used except PLBU complete */
+		value = MALIGP2_REG_VAL_IRQ_MASK_USED &
+			~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST;
+	}
+
+	mali_hw_core_register_write(&core->hw_core,
+				    MALIGP2_REG_ADDR_MGMT_INT_MASK,
+				    value);
+}
+
+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core)
+{
+	return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR);
+}
+
+#endif /* __MALI_GP_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_gp_job.c b/drivers/gpu/arm/utgard/common/mali_gp_job.c
new file mode 100644
index 000000000000..adc30a3408a8
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_gp_job.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_gp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_defer_bind.h"
+
+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER;      /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER;           /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job);
+
+
+static int _mali_gp_add_varying_allocations(struct mali_session_data *session,
+		struct mali_gp_job *job,
+		u32 *alloc,
+		u32 num)
+{
+	int i = 0;
+	struct mali_gp_allocation_node *alloc_node;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	for (i = 0 ; i < num ; i++) {
+		MALI_DEBUG_ASSERT(alloc[i]);
+		alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node));
+		if (alloc_node) {
+			INIT_LIST_HEAD(&alloc_node->node);
+			/* find mali allocation structure by vaddress*/
+			mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0);
+
+			if (likely(mali_vma_node)) {
+				mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+				MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start);
+			} else {
+				MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num));
+				MALI_DEBUG_ASSERT(0);
+			}
+			alloc_node->alloc = mali_alloc;
+			/* add to gp job varying alloc list*/
+			list_move(&alloc_node->node, &job->varying_alloc);
+		} else
+			goto fail;
+	}
+
+	return 0;
+fail:
+	MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n"));
+	_mali_gp_del_varying_allocations(job);
+	return -1;
+}
+
+
+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job)
+{
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+		list_del(&alloc_node->node);
+		kfree(alloc_node);
+	}
+	INIT_LIST_HEAD(&job->varying_alloc);
+}
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker)
+{
+	struct mali_gp_job *job;
+	u32 perf_counter_flag;
+	u32 __user *memory_list = NULL;
+	struct mali_gp_allocation_node *alloc_node, *tmp_node;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_gp_job));
+	if (NULL != job) {
+		job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s));
+		if (NULL == job->finished_notification) {
+			goto fail3;
+		}
+
+		job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s));
+		if (NULL == job->oom_notification) {
+			goto fail2;
+		}
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) {
+			goto fail1;
+		}
+
+		perf_counter_flag = mali_gp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0());
+			mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1());
+		}
+
+		_mali_osk_list_init(&job->list);
+		job->session = session;
+		job->id = id;
+		job->heap_base_addr = job->uargs.frame_registers[4];
+		job->heap_current_addr = job->uargs.frame_registers[4];
+		job->heap_grow_size = job->uargs.heap_grow_size;
+		job->perf_counter_value0 = 0;
+		job->perf_counter_value1 = 0;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+
+		INIT_LIST_HEAD(&job->varying_alloc);
+		INIT_LIST_HEAD(&job->vary_todo);
+		job->dmem = NULL;
+		/* add varying allocation list*/
+		if (uargs->varying_alloc_num) {
+			/* copy varying list from user space*/
+			job->varying_list = _mali_osk_calloc(1, sizeof(u32) * uargs->varying_alloc_num);
+			if (!job->varying_list) {
+				MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", uargs->varying_alloc_num));
+				goto fail1;
+			}
+
+			memory_list = (u32 __user *)(uintptr_t)uargs->varying_alloc_list;
+
+			if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32)*uargs->varying_alloc_num)) {
+				MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n"));
+				goto fail;
+			}
+
+			if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list,
+					uargs->varying_alloc_num))) {
+				MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n"));
+				goto fail;
+			}
+
+			/* do preparetion for each allocation */
+			list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) {
+				if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo))) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n"));
+					goto fail;
+				}
+			}
+
+			_mali_gp_del_varying_allocations(job);
+
+			/* bind varying here, to avoid memory latency issue. */
+			{
+				struct mali_defer_mem_block dmem_block;
+
+				INIT_LIST_HEAD(&dmem_block.free_pages);
+				atomic_set(&dmem_block.num_free_pages, 0);
+
+				if (mali_mem_prepare_mem_for_job(job, &dmem_block)) {
+					MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n"));
+					goto fail;
+				}
+				if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job->uargs.varying_memsize / _MALI_OSK_MALI_PAGE_SIZE, job, &dmem_block)) {
+					MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job));
+					goto fail;
+				}
+			}
+
+			if (uargs->varying_memsize > MALI_UK_BIG_VARYING_SIZE) {
+				job->big_job = 1;
+			}
+		}
+		job->pp_tracker = pp_tracker;
+		if (NULL != job->pp_tracker) {
+			/* Take a reference on PP job's tracker that will be released when the GP
+			   job is done. */
+			mali_timeline_system_tracker_get(session->timeline_system, pp_tracker);
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		return job;
+	} else {
+		MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n"));
+		return NULL;
+	}
+
+
+fail:
+	_mali_osk_free(job->varying_list);
+	/* Handle allocate fail here, free all varying node */
+	{
+		struct mali_backend_bind_list *bkn, *bkn_tmp;
+		list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+			list_del(&bkn->node);
+			_mali_osk_free(bkn);
+		}
+	}
+fail1:
+	_mali_osk_notification_delete(job->oom_notification);
+fail2:
+	_mali_osk_notification_delete(job->finished_notification);
+fail3:
+	_mali_osk_free(job);
+	return NULL;
+}
+
+void mali_gp_job_delete(struct mali_gp_job *job)
+{
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(NULL == job->pp_tracker);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	_mali_osk_free(job->varying_list);
+
+	/* Handle allocate fail here, free all varying node */
+	list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) {
+		list_del(&bkn->node);
+		_mali_osk_free(bkn);
+	}
+
+	if (!list_empty(&job->vary_todo)) {
+		MALI_DEBUG_ASSERT(0);
+	}
+
+	mali_mem_defer_dmem_free(job);
+
+	/* de-allocate the pre-allocated oom notifications */
+	if (NULL != job->oom_notification) {
+		_mali_osk_notification_delete(job->oom_notification);
+		job->oom_notification = NULL;
+	}
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+		job->finished_notification = NULL;
+	}
+
+	_mali_osk_free(job);
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_gp_job *iter;
+	struct mali_gp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_gp_job, list) {
+
+		/* A span is used to handle job ID wrapping. */
+		bool job_is_after = (mali_gp_job_get_id(job) -
+				     mali_gp_job_get_id(iter)) <
+				    MALI_SCHEDULER_JOB_ID_SPAN;
+
+		if (job_is_after) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+u32 mali_gp_job_get_gp_counter_src0(void)
+{
+	return gp_counter_src0;
+}
+
+void mali_gp_job_set_gp_counter_src0(u32 counter)
+{
+	gp_counter_src0 = counter;
+}
+
+u32 mali_gp_job_get_gp_counter_src1(void)
+{
+	return gp_counter_src1;
+}
+
+void mali_gp_job_set_gp_counter_src1(u32 counter)
+{
+	gp_counter_src1 = counter;
+}
+
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (NULL != job->pp_tracker) {
+		schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success);
+		job->pp_tracker = NULL;
+	}
+
+	return schedule_mask;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_gp_job.h b/drivers/gpu/arm/utgard/common/mali_gp_job.h
new file mode 100644
index 000000000000..f249439c7155
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_gp_job.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GP_JOB_H__
+#define __MALI_GP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_timeline.h"
+#include "mali_scheduler_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#include "mali_timeline.h"
+
+struct mali_defer_mem;
+/**
+ * This structure represents a GP job
+ *
+ * The GP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the GP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_gp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_gp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	struct mali_timeline_tracker *pp_tracker;          /**< Pointer to Timeline tracker for PP job that depends on this job. */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+
+	/*
+	 * These members are used by the executor and/or group,
+	 * protected by executor lock
+	 */
+	_mali_osk_notification_t *oom_notification;        /**< Notification sent back to userspace on OOM */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 heap_base_addr;                                /** < Holds the base mali addr of mem handle which is used for new heap*/
+	u32 heap_current_addr;                             /**< Holds the current HEAP address when the job has completed */
+	u32 heap_grow_size;                                /** < Holds the HEAP grow size when HEAP oom */
+	u32 perf_counter_value0;                           /**< Value of performance counter 0 (to be returned to user space) */
+	u32 perf_counter_value1;                           /**< Value of performance counter 1 (to be returned to user space) */
+	struct mali_defer_mem *dmem;                                          /** < used for defer bind to store dmem info */
+	struct list_head varying_alloc;                    /**< hold the list of varying allocations */
+	u32 bind_flag;                                     /** < flag for deferbind*/
+	u32 *varying_list;                                 /**< varying memory list need to to defer bind*/
+	struct list_head vary_todo;                        /**< list of backend list need to do defer bind*/
+	u32 big_job;                                       /** < if the gp job have large varying output and may take long time*/
+};
+
+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0)
+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2)
+
+struct mali_gp_allocation_node {
+	struct list_head node;
+	mali_mem_allocation *alloc;
+};
+
+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker);
+void mali_gp_job_delete(struct mali_gp_job *job);
+
+u32 mali_gp_job_get_gp_counter_src0(void);
+void mali_gp_job_set_gp_counter_src0(u32 counter);
+u32 mali_gp_job_get_gp_counter_src1(void);
+void mali_gp_job_set_gp_counter_src1(u32 counter);
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->heap_current_addr;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->heap_current_addr = heap_addr;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_src1;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0;
+}
+
+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src0 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.perf_counter_src1 = src;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0 = value;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1 = value;
+}
+
+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_gp_job_get_finished_notification(struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification(
+	struct mali_gp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job->oom_notification);
+
+	notification = job->oom_notification;
+	job->oom_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification(
+	struct mali_gp_job *job,
+	_mali_osk_notification_t *notification)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == job->oom_notification);
+	job->oom_notification = notification;
+}
+
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+
+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr(
+	struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+/**
+ * Release reference on tracker for PP job that depends on this GP job.
+ *
+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is
+ * deleted.
+ *
+ * @param job GP job that is done.
+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not.
+ * @return A scheduling bitmask indicating whether scheduling needs to be done.
+ */
+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success);
+
+#endif /* __MALI_GP_JOB_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_group.c b/drivers/gpu/arm/utgard/common/mali_group.c
new file mode 100644
index 000000000000..b4cd3a1fcd8c
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_group.c
@@ -0,0 +1,1816 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_group.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_mmu.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_scheduler.h"
+#include "mali_osk_profiling.h"
+#include "mali_pm_domain.h"
+#include "mali_pm.h"
+#include "mali_executor.h"
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+
+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2)
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num);
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, };
+static u32 mali_global_num_groups = 0;
+
+/* SW timer for job execution */
+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT;
+
+/* local helper functions */
+static void mali_group_bottom_half_mmu(void *data);
+static void mali_group_bottom_half_gp(void *data);
+static void mali_group_bottom_half_pp(void *data);
+static void mali_group_timeout(void *data);
+static void mali_group_out_of_memory(void *data);
+
+static void mali_group_reset_pp(struct mali_group *group);
+static void mali_group_reset_mmu(struct mali_group *group);
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session);
+static void mali_group_recovery_reset(struct mali_group *group);
+
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index)
+{
+	struct mali_group *group = NULL;
+
+	if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) {
+		MALI_PRINT_ERROR(("Mali group: Too many group objects created\n"));
+		return NULL;
+	}
+
+	group = _mali_osk_calloc(1, sizeof(struct mali_group));
+	if (NULL != group) {
+		group->timeout_timer = _mali_osk_timer_init();
+		if (NULL != group->timeout_timer) {
+			_mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group);
+
+			group->l2_cache_core[0] = core;
+			_mali_osk_list_init(&group->group_list);
+			_mali_osk_list_init(&group->executor_list);
+			_mali_osk_list_init(&group->pm_domain_list);
+			group->bcast_core = bcast;
+			group->dlbu_core = dlbu;
+
+			/* register this object as a part of the correct power domain */
+			if ((NULL != core) || (NULL != dlbu) || (NULL != bcast))
+				group->pm_domain = mali_pm_register_group(domain_index, group);
+
+			mali_global_groups[mali_global_num_groups] = group;
+			mali_global_num_groups++;
+
+			return group;
+		}
+		_mali_osk_free(group);
+	}
+
+	return NULL;
+}
+
+void mali_group_delete(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_PRINT(4, ("Deleting group %s\n",
+			     mali_group_core_description(group)));
+
+	MALI_DEBUG_ASSERT(NULL == group->parent_group);
+	MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state)));
+
+	/* Delete the resources that this group owns */
+	if (NULL != group->gp_core) {
+		mali_gp_delete(group->gp_core);
+	}
+
+	if (NULL != group->pp_core) {
+		mali_pp_delete(group->pp_core);
+	}
+
+	if (NULL != group->mmu) {
+		mali_mmu_delete(group->mmu);
+	}
+
+	if (mali_group_is_virtual(group)) {
+		/* Remove all groups from virtual group */
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			child->parent_group = NULL;
+			mali_group_delete(child);
+		}
+
+		mali_dlbu_delete(group->dlbu_core);
+
+		if (NULL != group->bcast_core) {
+			mali_bcast_unit_delete(group->bcast_core);
+		}
+	}
+
+	for (i = 0; i < mali_global_num_groups; i++) {
+		if (mali_global_groups[i] == group) {
+			mali_global_groups[i] = NULL;
+			mali_global_num_groups--;
+
+			if (i != mali_global_num_groups) {
+				/* We removed a group from the middle of the array -- move the last
+				 * group to the current position to close the gap */
+				mali_global_groups[i] = mali_global_groups[mali_global_num_groups];
+				mali_global_groups[mali_global_num_groups] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	if (NULL != group->timeout_timer) {
+		_mali_osk_timer_del(group->timeout_timer);
+		_mali_osk_timer_term(group->timeout_timer);
+	}
+
+	if (NULL != group->bottom_half_work_mmu) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+	}
+
+	if (NULL != group->bottom_half_work_gp) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_gp);
+	}
+
+	if (NULL != group->bottom_half_work_pp) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_pp);
+	}
+
+	_mali_osk_free(group);
+}
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core)
+{
+	/* This group object now owns the MMU core object */
+	group->mmu = mmu_core;
+	group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group);
+	if (NULL == group->bottom_half_work_mmu) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_mmu_core(struct mali_group *group)
+{
+	/* This group object no longer owns the MMU core object */
+	group->mmu = NULL;
+	if (NULL != group->bottom_half_work_mmu) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_mmu);
+	}
+}
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core)
+{
+	/* This group object now owns the GP core object */
+	group->gp_core = gp_core;
+	group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group);
+	if (NULL == group->bottom_half_work_gp) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	group->oom_work_handler = _mali_osk_wq_create_work(mali_group_out_of_memory, group);
+	if (NULL == group->oom_work_handler) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_gp);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_gp_core(struct mali_group *group)
+{
+	/* This group object no longer owns the GP core object */
+	group->gp_core = NULL;
+	if (NULL != group->bottom_half_work_gp) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_gp);
+	}
+
+	if (NULL != group->oom_work_handler) {
+		_mali_osk_wq_delete_work(group->oom_work_handler);
+	}
+}
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core)
+{
+	/* This group object now owns the PP core object */
+	group->pp_core = pp_core;
+	group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group);
+	if (NULL == group->bottom_half_work_pp) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_group_remove_pp_core(struct mali_group *group)
+{
+	/* This group object no longer owns the PP core object */
+	group->pp_core = NULL;
+	if (NULL != group->bottom_half_work_pp) {
+		_mali_osk_wq_delete_work(group->bottom_half_work_pp);
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n",
+			     mali_group_core_description(group)));
+
+	if (MALI_GROUP_STATE_INACTIVE == group->state) {
+		/* Group is inactive, get PM refs in order to power up */
+
+		/*
+		 * We'll take a maximum of 2 power domain references pr group,
+		 * one for the group itself, and one for it's L2 cache.
+		 */
+		struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+		struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS];
+		u32 num_domains = 0;
+		mali_bool all_groups_on;
+
+		/* Deal with child groups first */
+		if (mali_group_is_virtual(group)) {
+			/*
+			 * The virtual group might have 0, 1 or 2 L2s in
+			 * its l2_cache_core array, but we ignore these and
+			 * let the child groups take the needed L2 cache ref
+			 * on behalf of the virtual group.
+			 * In other words; The L2 refs are taken in pair with
+			 * the physical group which the L2 is attached to.
+			 */
+			struct mali_group *child;
+			struct mali_group *temp;
+
+			/*
+			 * Child group is inactive, get PM
+			 * refs in order to power up.
+			 */
+			_MALI_OSK_LIST_FOREACHENTRY(child, temp,
+						    &group->group_list,
+						    struct mali_group, group_list) {
+				MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE
+						  == child->state);
+
+				child->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+				MALI_DEBUG_ASSERT_POINTER(
+					child->pm_domain);
+				domains[num_domains] = child->pm_domain;
+				groups[num_domains] = child;
+				num_domains++;
+
+				/*
+				 * Take L2 domain ref for child group.
+				 */
+				MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS
+						  > num_domains);
+				domains[num_domains] = mali_l2_cache_get_pm_domain(
+							       child->l2_cache_core[0]);
+				groups[num_domains] = NULL;
+				MALI_DEBUG_ASSERT(NULL ==
+						  child->l2_cache_core[1]);
+				num_domains++;
+			}
+		} else {
+			/* Take L2 domain ref for physical groups. */
+			MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+					  num_domains);
+
+			domains[num_domains] = mali_l2_cache_get_pm_domain(
+						       group->l2_cache_core[0]);
+			groups[num_domains] = NULL;
+			MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+			num_domains++;
+		}
+
+		/* Do the group itself last (it's dependencies first) */
+
+		group->state = MALI_GROUP_STATE_ACTIVATION_PENDING;
+
+		MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+		domains[num_domains] = group->pm_domain;
+		groups[num_domains] = group;
+		num_domains++;
+
+		all_groups_on = mali_pm_get_domain_refs(domains, groups,
+							num_domains);
+
+		/*
+		 * Complete activation for group, include
+		 * virtual group or physical group.
+		 */
+		if (MALI_TRUE == all_groups_on) {
+
+			mali_group_set_active(group);
+		}
+	} else if (MALI_GROUP_STATE_ACTIVE == group->state) {
+		/* Already active */
+		MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+	} else {
+		/*
+		 * Activation already pending, group->power_is_on could
+		 * be both true or false. We need to wait for power up
+		 * notification anyway.
+		 */
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING
+				  == group->state);
+	}
+
+	MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n",
+			     mali_group_core_description(group),
+			     MALI_GROUP_STATE_ACTIVE == group->state ?
+			     "ACTIVE" : "PENDING"));
+
+	return group->state;
+}
+
+mali_bool mali_group_set_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state);
+	MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+
+	MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n",
+			     mali_group_core_description(group)));
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+					    struct mali_group, group_list) {
+			if (MALI_TRUE != child->power_is_on) {
+				return MALI_FALSE;
+			}
+
+			child->state = MALI_GROUP_STATE_ACTIVE;
+		}
+
+		mali_group_reset(group);
+	}
+
+	/* Go to ACTIVE state */
+	group->state = MALI_GROUP_STATE_ACTIVE;
+
+	return MALI_TRUE;
+}
+
+mali_bool mali_group_deactivate(struct mali_group *group)
+{
+	struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS];
+	u32 num_domains = 0;
+	mali_bool power_down = MALI_FALSE;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state);
+
+	MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n",
+			     mali_group_core_description(group)));
+
+	group->state = MALI_GROUP_STATE_INACTIVE;
+
+	MALI_DEBUG_ASSERT_POINTER(group->pm_domain);
+	domains[num_domains] = group->pm_domain;
+	num_domains++;
+
+	if (mali_group_is_virtual(group)) {
+		/* Release refs for all child groups */
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp,
+					    &group->group_list,
+					    struct mali_group, group_list) {
+			child->state = MALI_GROUP_STATE_INACTIVE;
+
+			MALI_DEBUG_ASSERT_POINTER(child->pm_domain);
+			domains[num_domains] = child->pm_domain;
+			num_domains++;
+
+			/* Release L2 cache domain for child groups */
+			MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+					  num_domains);
+			domains[num_domains] = mali_l2_cache_get_pm_domain(
+						       child->l2_cache_core[0]);
+			MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]);
+			num_domains++;
+		}
+
+		/*
+		 * Must do mali_group_power_down() steps right here for
+		 * virtual group, because virtual group itself is likely to
+		 * stay powered on, however child groups are now very likely
+		 * to be powered off (and thus lose their state).
+		 */
+
+		mali_group_clear_session(group);
+		/*
+		 * Disable the broadcast unit (clear it's mask).
+		 * This is needed in case the GPU isn't actually
+		 * powered down at this point and groups are
+		 * removed from an inactive virtual group.
+		 * If not, then the broadcast unit will intercept
+		 * their interrupts!
+		 */
+		mali_bcast_disable(group->bcast_core);
+	} else {
+		/* Release L2 cache domain for physical groups */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS >
+				  num_domains);
+		domains[num_domains] = mali_l2_cache_get_pm_domain(
+					       group->l2_cache_core[0]);
+		MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]);
+		num_domains++;
+	}
+
+	power_down = mali_pm_put_domain_refs(domains, num_domains);
+
+	return power_down;
+}
+
+void mali_group_power_up(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n",
+			     mali_group_core_description(group)));
+
+	group->power_is_on = MALI_TRUE;
+
+	if (MALI_FALSE == mali_group_is_virtual(group)
+	    && MALI_FALSE == mali_group_is_in_virtual(group)) {
+		mali_group_reset(group);
+	}
+
+	/*
+	 * When we just acquire only one physical group form virt group,
+	 * we should remove the bcast&dlbu mask from virt group and
+	 * reset bcast and dlbu core, although part of pp cores in virt
+	 * group maybe not be powered on.
+	 */
+	if (MALI_TRUE == mali_group_is_virtual(group)) {
+		mali_bcast_reset(group->bcast_core);
+		mali_dlbu_update_mask(group->dlbu_core);
+	}
+}
+
+void mali_group_power_down(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n",
+			     mali_group_core_description(group)));
+
+	group->power_is_on = MALI_FALSE;
+
+	if (mali_group_is_virtual(group)) {
+		/*
+		 * What we do for physical jobs in this function should
+		 * already have been done in mali_group_deactivate()
+		 * for virtual group.
+		 */
+		MALI_DEBUG_ASSERT(NULL == group->session);
+	} else {
+		mali_group_clear_session(group);
+	}
+}
+
+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup)
+{
+	u32 i;
+	struct mali_group *group;
+	struct mali_group *temp;
+
+	MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n",
+			     mali_group_core_description(vgroup),
+			     vgroup));
+	MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0]));
+	MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1]));
+
+	i = 0;
+	_MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) {
+		MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n",
+				     i, mali_group_core_description(group),
+				     group, group->l2_cache_core[0]));
+		i++;
+	}
+})
+
+static void mali_group_dump_core_status(struct mali_group *group)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group)));
+
+	if (NULL != group->gp_core) {
+		MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description));
+
+		for (i = 0; i < 0xA8; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->gp_core->hw_core, i + 12)));
+		}
+
+
+	} else {
+		MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description));
+
+		for (i = 0; i < 0x5c; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+
+		/* Ignore some minor registers */
+		for (i = 0x1000; i < 0x1068; i += 0x10) {
+			MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 4),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 8),
+				    mali_hw_core_register_read(&group->pp_core->hw_core, i + 12)));
+		}
+	}
+
+	MALI_PRINT(("Dump Group MMU\n"));
+	for (i = 0; i < 0x24; i += 0x10) {
+		MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 4),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 8),
+			    mali_hw_core_register_read(&group->mmu->hw_core, i + 12)));
+	}
+}
+
+
+/**
+ * @Dump group status
+ */
+void mali_group_dump_status(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *group_c;
+		struct mali_group *temp;
+		_MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) {
+			mali_group_dump_core_status(group_c);
+		}
+	} else {
+		mali_group_dump_core_status(group);
+	}
+}
+
+/**
+ * @brief Add child group to virtual group parent
+ */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child)
+{
+	mali_bool found;
+	u32 i;
+
+	MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n",
+			     mali_group_core_description(child),
+			     mali_group_core_description(parent)));
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+	MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+	MALI_DEBUG_ASSERT(NULL == child->parent_group);
+
+	_mali_osk_list_addtail(&child->group_list, &parent->group_list);
+
+	child->parent_group = parent;
+
+	MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]);
+
+	MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1]));
+	MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1]));
+
+	/* Keep track of the L2 cache cores of child groups */
+	found = MALI_FALSE;
+	for (i = 0; i < 2; i++) {
+		if (parent->l2_cache_core[i] == child->l2_cache_core[0]) {
+			MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0);
+			parent->l2_cache_core_ref_count[i]++;
+			found = MALI_TRUE;
+		}
+	}
+
+	if (!found) {
+		/* First time we see this L2 cache, add it to our list */
+		i = (NULL == parent->l2_cache_core[0]) ? 0 : 1;
+
+		MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i]));
+
+		MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]);
+
+		parent->l2_cache_core[i] = child->l2_cache_core[0];
+		parent->l2_cache_core_ref_count[i]++;
+	}
+
+	/* Update Broadcast Unit and DLBU */
+	mali_bcast_add_group(parent->bcast_core, child);
+	mali_dlbu_add_group(parent->dlbu_core, child);
+
+	if (MALI_TRUE == parent->power_is_on) {
+		mali_bcast_reset(parent->bcast_core);
+		mali_dlbu_update_mask(parent->dlbu_core);
+	}
+
+	if (MALI_TRUE == child->power_is_on) {
+		if (NULL == parent->session) {
+			if (NULL != child->session) {
+				/*
+				 * Parent has no session, so clear
+				 * child session as well.
+				 */
+				mali_mmu_activate_empty_page_directory(child->mmu);
+			}
+		} else {
+			if (parent->session == child->session) {
+				/* We already have same session as parent,
+				 * so a simple zap should be enough.
+				 */
+				mali_mmu_zap_tlb(child->mmu);
+			} else {
+				/*
+				 * Parent has a different session, so we must
+				 * switch to that sessions page table
+				 */
+				mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session));
+			}
+
+			/* It is the parent which keeps the session from now on */
+			child->session = NULL;
+		}
+	} else {
+		/* should have been cleared when child was powered down */
+		MALI_DEBUG_ASSERT(NULL == child->session);
+	}
+
+	/* Start job on child when parent is active */
+	if (NULL != parent->pp_running_job) {
+		struct mali_pp_job *job = parent->pp_running_job;
+
+		MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n",
+				     child, mali_pp_job_get_id(job), parent));
+
+		/* Only allowed to add active child to an active parent */
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state);
+		MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state);
+
+		mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+					      mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+					      mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+		trace_gpu_sched_switch(
+			mali_pp_core_description(group->pp_core),
+			sched_clock(), mali_pp_job_get_tid(job),
+			0, mali_pp_job_get_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+		trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+				       mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+	}
+
+	MALI_DEBUG_CODE(mali_group_print_virtual(parent);)
+}
+
+/**
+ * @brief Remove child group from virtual group parent
+ */
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child)
+{
+	u32 i;
+
+	MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n",
+			     mali_group_core_description(child),
+			     mali_group_core_description(parent)));
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+	MALI_DEBUG_ASSERT(!mali_group_is_virtual(child));
+	MALI_DEBUG_ASSERT(parent == child->parent_group);
+
+	/* Update Broadcast Unit and DLBU */
+	mali_bcast_remove_group(parent->bcast_core, child);
+	mali_dlbu_remove_group(parent->dlbu_core, child);
+
+	if (MALI_TRUE == parent->power_is_on) {
+		mali_bcast_reset(parent->bcast_core);
+		mali_dlbu_update_mask(parent->dlbu_core);
+	}
+
+	child->session = parent->session;
+	child->parent_group = NULL;
+
+	_mali_osk_list_delinit(&child->group_list);
+	if (_mali_osk_list_empty(&parent->group_list)) {
+		parent->session = NULL;
+	}
+
+	/* Keep track of the L2 cache cores of child groups */
+	i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1;
+
+	MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]);
+
+	parent->l2_cache_core_ref_count[i]--;
+	if (parent->l2_cache_core_ref_count[i] == 0) {
+		parent->l2_cache_core[i] = NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_group_print_virtual(parent));
+}
+
+struct mali_group *mali_group_acquire_group(struct mali_group *parent)
+{
+	struct mali_group *child = NULL;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(parent));
+
+	if (!_mali_osk_list_empty(&parent->group_list)) {
+		child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list);
+		mali_group_remove_group(parent, child);
+	}
+
+	if (NULL != child) {
+		if (MALI_GROUP_STATE_ACTIVE != parent->state
+		    && MALI_TRUE == child->power_is_on) {
+			mali_group_reset(child);
+		}
+	}
+
+	return child;
+}
+
+void mali_group_reset(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT(NULL == group->gp_running_job);
+	MALI_DEBUG_ASSERT(NULL == group->pp_running_job);
+	MALI_DEBUG_ASSERT(NULL == group->session);
+
+	MALI_DEBUG_PRINT(3, ("Group: reset of %s\n",
+			     mali_group_core_description(group)));
+
+	if (NULL != group->dlbu_core) {
+		mali_dlbu_reset(group->dlbu_core);
+	}
+
+	if (NULL != group->bcast_core) {
+		mali_bcast_reset(group->bcast_core);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != group->mmu);
+	mali_group_reset_mmu(group);
+
+	if (NULL != group->gp_core) {
+		MALI_DEBUG_ASSERT(NULL == group->pp_core);
+		mali_gp_reset(group->gp_core);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != group->pp_core);
+		mali_group_reset_pp(group);
+	}
+}
+
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n",
+			     job,
+			     mali_group_core_description(group)));
+
+	session = mali_gp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+	mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job));
+
+	mali_group_activate_page_directory(group, session);
+
+	mali_gp_job_start(group->gp_core, job);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+				      mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */,  0 /* core */,
+			       mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+	    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+		mali_group_report_l2_cache_counters_per_core(group, 0);
+	}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_sched_switch(mali_gp_core_description(group->gp_core),
+			       sched_clock(), mali_gp_job_get_tid(job),
+			       0, mali_gp_job_get_id(job));
+#endif
+
+	group->gp_running_job = job;
+	group->is_working = MALI_TRUE;
+
+	/* Setup SW timer and record start time */
+	group->start_time = _mali_osk_time_tickcount();
+	_mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+	MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n",
+			     job,
+			     mali_group_core_description(group),
+			     group->start_time));
+}
+
+/* Used to set all the registers except frame renderer list address and fragment shader stack address
+ * It means the caller must set these two registers properly before calling this function
+ */
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n",
+			     job, sub_job + 1,
+			     mali_pp_job_get_sub_job_count(job),
+			     mali_group_core_description(group)));
+
+	session = mali_pp_job_get_session(job);
+
+	if (NULL != group->l2_cache_core[0]) {
+		mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job));
+	}
+
+	if (NULL != group->l2_cache_core[1]) {
+		mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job));
+	}
+
+	mali_group_activate_page_directory(group, session);
+
+	if (mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+		u32 core_num = 0;
+
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		/* Configure DLBU for the job */
+		mali_dlbu_config_job(group->dlbu_core, job);
+
+		/* Write stack address for each child group */
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			mali_pp_write_addr_stack(child->pp_core, job);
+			core_num++;
+		}
+
+		mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+	} else {
+		mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE);
+	}
+
+	/* if the group is virtual, loop through physical groups which belong to this group
+	 * and call profiling events for its cores as virtual */
+	if (MALI_TRUE == mali_group_is_virtual(group)) {
+		struct mali_group *child;
+		struct mali_group *temp;
+
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+						      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+						      MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+						      mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+						      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+						      mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+			trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+					       mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+		}
+
+#if defined(CONFIG_MALI400_PROFILING)
+		if (0 != group->l2_cache_core_ref_count[0]) {
+			if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+			    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+				mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+			}
+		}
+		if (0 != group->l2_cache_core_ref_count[1]) {
+			if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+			    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+				mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+			}
+		}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+
+	} else { /* group is physical - call profiling events for physical cores */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH,
+					      mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0);
+
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+					      mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+				       mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job));
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+		if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+		    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+			mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+		}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
+	}
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_sched_switch(mali_pp_core_description(group->pp_core),
+			       sched_clock(), mali_pp_job_get_tid(job),
+			       0, mali_pp_job_get_id(job));
+#endif
+
+	group->pp_running_job = job;
+	group->pp_running_sub_job = sub_job;
+	group->is_working = MALI_TRUE;
+
+	/* Setup SW timer and record start time */
+	group->start_time = _mali_osk_time_tickcount();
+	_mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime));
+
+	MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n",
+			     job, sub_job + 1,
+			     mali_pp_job_get_sub_job_count(job),
+			     mali_group_core_description(group),
+			     group->start_time));
+
+}
+
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr)
+{
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]);
+	mali_l2_cache_invalidate(group->l2_cache_core[0]);
+
+	mali_mmu_zap_tlb_without_stall(group->mmu);
+
+	mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+	trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */,  0 /* core */,
+			       mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+}
+
+static void mali_group_reset_mmu(struct mali_group *group)
+{
+	struct mali_group *child;
+	struct mali_group *temp;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (!mali_group_is_virtual(group)) {
+		/* This is a physical group or an idle virtual group -- simply wait for
+		 * the reset to complete. */
+		err = mali_mmu_reset(group->mmu);
+		MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+	} else { /* virtual group */
+		/* Loop through all members of this virtual group and wait
+		 * until they are done resetting.
+		 */
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			err = mali_mmu_reset(child->mmu);
+			MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+		}
+	}
+}
+
+static void mali_group_reset_pp(struct mali_group *group)
+{
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	mali_pp_reset_async(group->pp_core);
+
+	if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) {
+		/* This is a physical group or an idle virtual group -- simply wait for
+		 * the reset to complete. */
+		mali_pp_reset_wait(group->pp_core);
+	} else {
+		/* Loop through all members of this virtual group and wait until they
+		 * are done resetting.
+		 */
+		_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+			mali_pp_reset_wait(child->pp_core);
+		}
+	}
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job)
+{
+	struct mali_pp_job *pp_job_to_return;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_running_job);
+	MALI_DEBUG_ASSERT_POINTER(sub_job);
+	MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+	/* Stop/clear the timeout timer. */
+	_mali_osk_timer_del_async(group->timeout_timer);
+
+	if (NULL != group->pp_running_job) {
+
+		/* Deal with HW counters and profiling */
+
+		if (MALI_TRUE == mali_group_is_virtual(group)) {
+			struct mali_group *child;
+			struct mali_group *temp;
+
+			/* update performance counters from each physical pp core within this virtual group */
+			_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+				mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core));
+			}
+
+#if defined(CONFIG_MALI400_PROFILING)
+			/* send profiling data per physical core */
+			_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+				_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+							      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) |
+							      MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL,
+							      mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+							      mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)),
+							      mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+							      0, 0);
+
+				trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+						       0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core),
+						       mali_pp_job_get_frame_builder_id(group->pp_running_job),
+						       mali_pp_job_get_flush_id(group->pp_running_job));
+			}
+			if (0 != group->l2_cache_core_ref_count[0]) {
+				if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+				    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+					mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+				}
+			}
+			if (0 != group->l2_cache_core_ref_count[1]) {
+				if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) &&
+				    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) {
+					mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1]));
+				}
+			}
+
+#endif
+		} else {
+			/* update performance counters for a physical group's pp core */
+			mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+			_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+						      MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) |
+						      MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL,
+						      mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job),
+						      mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job),
+						      mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8),
+						      0, 0);
+
+			trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job),
+					       0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core),
+					       mali_pp_job_get_frame_builder_id(group->pp_running_job),
+					       mali_pp_job_get_flush_id(group->pp_running_job));
+
+			if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+			    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) {
+				mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0]));
+			}
+#endif
+		}
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+		trace_gpu_sched_switch(
+			mali_gp_core_description(group->gp_core),
+			sched_clock(), 0, 0, 0);
+#endif
+
+	}
+
+	if (success) {
+		/* Only do soft reset for successful jobs, a full recovery
+		 * reset will be done for failed jobs. */
+		mali_pp_reset_async(group->pp_core);
+	}
+
+	pp_job_to_return = group->pp_running_job;
+	group->pp_running_job = NULL;
+	group->is_working = MALI_FALSE;
+	*sub_job = group->pp_running_sub_job;
+
+	if (!success) {
+		MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+		mali_group_recovery_reset(group);
+	} else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) {
+		MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+		mali_group_recovery_reset(group);
+	}
+
+	return pp_job_to_return;
+}
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success)
+{
+	struct mali_gp_job *gp_job_to_return;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_running_job);
+	MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working);
+
+	/* Stop/clear the timeout timer. */
+	_mali_osk_timer_del_async(group->timeout_timer);
+
+	if (NULL != group->gp_running_job) {
+		mali_gp_update_performance_counters(group->gp_core, group->gp_running_job);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+					      mali_gp_job_get_perf_counter_value0(group->gp_running_job),
+					      mali_gp_job_get_perf_counter_value1(group->gp_running_job),
+					      mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8),
+					      0, 0);
+
+		if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) &&
+		    (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0])))
+			mali_group_report_l2_cache_counters_per_core(group, 0);
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+		trace_gpu_sched_switch(
+			mali_pp_core_description(group->pp_core),
+			sched_clock(), 0, 0, 0);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+		trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */,  0 /* core */,
+				       mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job));
+#endif
+
+		mali_gp_job_set_current_heap_addr(group->gp_running_job,
+						  mali_gp_read_plbu_alloc_start_addr(group->gp_core));
+	}
+
+	if (success) {
+		/* Only do soft reset for successful jobs, a full recovery
+		 * reset will be done for failed jobs. */
+		mali_gp_reset_async(group->gp_core);
+	}
+
+	gp_job_to_return = group->gp_running_job;
+	group->gp_running_job = NULL;
+	group->is_working = MALI_FALSE;
+
+	if (!success) {
+		MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n"));
+		mali_group_recovery_reset(group);
+	} else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) {
+		MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n"));
+		mali_group_recovery_reset(group);
+	}
+
+	return gp_job_to_return;
+}
+
+struct mali_group *mali_group_get_glob_group(u32 index)
+{
+	if (mali_global_num_groups > index) {
+		return mali_global_groups[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_group_get_glob_num_groups(void)
+{
+	return mali_global_num_groups;
+}
+
+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n",
+			     mali_session_get_page_directory(session), session,
+			     mali_group_core_description(group)));
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != session) {
+		/* Different session than last time, so we need to do some work */
+		MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n",
+				     session, group->session,
+				     mali_group_core_description(group)));
+		mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session));
+		group->session = session;
+	} else {
+		/* Same session as last time, so no work required */
+		MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n",
+				     session->page_directory,
+				     mali_group_core_description(group)));
+		mali_mmu_zap_tlb_without_stall(group->mmu);
+	}
+}
+
+static void mali_group_recovery_reset(struct mali_group *group)
+{
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* Stop cores, bus stop */
+	if (NULL != group->pp_core) {
+		mali_pp_stop_bus(group->pp_core);
+	} else {
+		mali_gp_stop_bus(group->gp_core);
+	}
+
+	/* Flush MMU and clear page fault (if any) */
+	mali_mmu_activate_fault_flush_page_directory(group->mmu);
+	mali_mmu_page_fault_done(group->mmu);
+
+	/* Wait for cores to stop bus, then do a hard reset on them */
+	if (NULL != group->pp_core) {
+		if (mali_group_is_virtual(group)) {
+			struct mali_group *child, *temp;
+
+			/* Disable the broadcast unit while we do reset directly on the member cores. */
+			mali_bcast_disable(group->bcast_core);
+
+			_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) {
+				mali_pp_stop_bus_wait(child->pp_core);
+				mali_pp_hard_reset(child->pp_core);
+			}
+
+			mali_bcast_enable(group->bcast_core);
+		} else {
+			mali_pp_stop_bus_wait(group->pp_core);
+			mali_pp_hard_reset(group->pp_core);
+		}
+	} else {
+		mali_gp_stop_bus_wait(group->gp_core);
+		mali_gp_hard_reset(group->gp_core);
+	}
+
+	/* Reset MMU */
+	err = mali_mmu_reset(group->mmu);
+	MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err);
+	MALI_IGNORE(err);
+
+	group->session = NULL;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size)
+{
+	int n = 0;
+	int i;
+	struct mali_group *child;
+	struct mali_group *temp;
+
+	if (mali_group_is_virtual(group)) {
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"Virtual PP Group: %p\n", group);
+	} else if (mali_group_is_in_virtual(group)) {
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"Child PP Group: %p\n", group);
+	} else if (NULL != group->pp_core) {
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"Physical PP Group: %p\n", group);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"GP Group: %p\n", group);
+	}
+
+	switch (group->state) {
+	case MALI_GROUP_STATE_INACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tstate: INACTIVE\n");
+		break;
+	case MALI_GROUP_STATE_ACTIVATION_PENDING:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tstate: ACTIVATION_PENDING\n");
+		break;
+	case MALI_GROUP_STATE_ACTIVE:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tstate: MALI_GROUP_STATE_ACTIVE\n");
+		break;
+	default:
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tstate: UNKNOWN (%d)\n", group->state);
+		MALI_DEBUG_ASSERT(0);
+		break;
+	}
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tSW power: %s\n",
+				group->power_is_on ? "On" : "Off");
+
+	n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n);
+
+	for (i = 0; i < 2; i++) {
+		if (NULL != group->l2_cache_core[i]) {
+			struct mali_pm_domain *domain;
+			domain = mali_l2_cache_get_pm_domain(
+					 group->l2_cache_core[i]);
+			n += mali_pm_dump_state_domain(domain,
+						       buf + n, size - n);
+		}
+	}
+
+	if (group->gp_core) {
+		n += mali_gp_dump_state(group->gp_core, buf + n, size - n);
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tGP running job: %p\n", group->gp_running_job);
+	}
+
+	if (group->pp_core) {
+		n += mali_pp_dump_state(group->pp_core, buf + n, size - n);
+		n += _mali_osk_snprintf(buf + n, size - n,
+					"\tPP running job: %p, subjob %d \n",
+					group->pp_running_job,
+					group->pp_running_sub_job);
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list,
+				    struct mali_group, group_list) {
+		n += mali_group_dump_state(child, buf + n, size - n);
+	}
+
+	return n;
+}
+#endif
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+	_mali_osk_errcode_t ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	if (NULL != group->gp_core) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+						      mali_pp_core_get_id(group->pp_core)),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	}
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+	ret = mali_executor_interrupt_mmu(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+	if (NULL != group->gp_core) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	} else {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+					      0, 0, /* No pid and tid for interrupt handler */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+						      mali_pp_core_get_id(group->pp_core)),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	}
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+	return ret;
+}
+
+static void mali_group_bottom_half_mmu(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+	if (NULL != group->gp_core) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+					      0, _mali_osk_get_tid(), /* pid and tid */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+					      0, _mali_osk_get_tid(), /* pid and tid */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+						      mali_pp_core_get_id(group->pp_core)),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	}
+
+	mali_executor_interrupt_mmu(group, MALI_FALSE);
+
+	if (NULL != group->gp_core) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+					      0, _mali_osk_get_tid(), /* pid and tid */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	} else {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+					      0, _mali_osk_get_tid(), /* pid and tid */
+					      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(
+						      mali_pp_core_get_id(group->pp_core)),
+					      mali_mmu_get_rawstat(group->mmu), 0);
+	}
+}
+
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+	_mali_osk_errcode_t ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+				      0, 0, /* No pid and tid for interrupt handler */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+				      mali_gp_get_rawstat(group->gp_core), 0);
+
+	MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+			     mali_gp_get_rawstat(group->gp_core),
+			     mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+	ret = mali_executor_interrupt_gp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+				      0, 0, /* No pid and tid for interrupt handler */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+				      mali_gp_get_rawstat(group->gp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+	return ret;
+}
+
+static void mali_group_bottom_half_gp(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+				      0, _mali_osk_get_tid(), /* pid and tid */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+				      mali_gp_get_rawstat(group->gp_core), 0);
+
+	mali_executor_interrupt_gp(group, MALI_FALSE);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+				      0, _mali_osk_get_tid(), /* pid and tid */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0),
+				      mali_gp_get_rawstat(group->gp_core), 0);
+}
+
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+	_mali_osk_errcode_t ret;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+				      0, 0, /* No pid and tid for interrupt handler */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+					      mali_pp_core_get_id(group->pp_core)),
+				      mali_pp_get_rawstat(group->pp_core), 0);
+
+	MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n",
+			     mali_pp_get_rawstat(group->pp_core),
+			     mali_group_core_description(group)));
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+
+	ret = mali_executor_interrupt_pp(group, MALI_TRUE);
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_lock();
+	if (!mali_group_is_working(group)) {
+		/* Not working, so nothing to do */
+		mali_executor_unlock();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF,
+				      0, 0, /* No pid and tid for interrupt handler */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+					      mali_pp_core_get_id(group->pp_core)),
+				      mali_pp_get_rawstat(group->pp_core), 0);
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	mali_executor_unlock();
+#endif
+#endif
+	return ret;
+}
+
+static void mali_group_bottom_half_pp(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+				      0, _mali_osk_get_tid(), /* pid and tid */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+					      mali_pp_core_get_id(group->pp_core)),
+				      mali_pp_get_rawstat(group->pp_core), 0);
+
+	mali_executor_interrupt_pp(group, MALI_FALSE);
+
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP |
+				      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+				      MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF,
+				      0, _mali_osk_get_tid(), /* pid and tid */
+				      MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(
+					      mali_pp_core_get_id(group->pp_core)),
+				      mali_pp_get_rawstat(group->pp_core), 0);
+}
+
+static void mali_group_timeout(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n",
+			     mali_group_core_description(group),
+			     _mali_osk_time_tickcount()));
+
+	if (NULL != group->gp_core) {
+		mali_group_schedule_bottom_half_gp(group);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+		mali_group_schedule_bottom_half_pp(group);
+	}
+}
+
+static void mali_group_out_of_memory(void *data)
+{
+	struct mali_group *group = (struct mali_group *)data;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+
+	mali_executor_group_oom(group);
+}
+
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (group->session != session) {
+		/* not running from this session */
+		return MALI_TRUE; /* success */
+	}
+
+	if (group->is_working) {
+		/* The Zap also does the stall and disable_stall */
+		mali_bool zap_success = mali_mmu_zap_tlb(group->mmu);
+		return zap_success;
+	} else {
+		/* Just remove the session instead of zapping */
+		mali_group_clear_session(group);
+		return MALI_TRUE; /* success */
+	}
+}
+
+#if defined(CONFIG_MALI400_PROFILING)
+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num)
+{
+	u32 source0 = 0;
+	u32 value0 = 0;
+	u32 source1 = 0;
+	u32 value1 = 0;
+	u32 profiling_channel = 0;
+
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	switch (core_num) {
+	case 0:
+		profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+				    MALI_PROFILING_EVENT_CHANNEL_GPU |
+				    MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+		break;
+	case 1:
+		profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+				    MALI_PROFILING_EVENT_CHANNEL_GPU |
+				    MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS;
+		break;
+	case 2:
+		profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+				    MALI_PROFILING_EVENT_CHANNEL_GPU |
+				    MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS;
+		break;
+	default:
+		profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE |
+				    MALI_PROFILING_EVENT_CHANNEL_GPU |
+				    MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS;
+		break;
+	}
+
+	if (0 == core_num) {
+		mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+	}
+	if (1 == core_num) {
+		if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+			mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+		} else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+			mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+		}
+	}
+	if (2 == core_num) {
+		if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) {
+			mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1);
+		} else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) {
+			mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1);
+		}
+	}
+
+	_mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0);
+}
+#endif /* #if defined(CONFIG_MALI400_PROFILING) */
diff --git a/drivers/gpu/arm/utgard/common/mali_group.h b/drivers/gpu/arm/utgard/common/mali_group.h
new file mode 100644
index 000000000000..705605ec1d9f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_group.h
@@ -0,0 +1,467 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_GROUP_H__
+#define __MALI_GROUP_H__
+
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_mmu.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_session.h"
+#include "mali_osk_profiling.h"
+
+/**
+ * @brief Default max runtime [ms] for a core job - used by timeout timers
+ */
+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000
+
+extern int mali_max_job_runtime;
+
+#define MALI_MAX_NUMBER_OF_GROUPS 10
+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8
+
+enum mali_group_state {
+	MALI_GROUP_STATE_INACTIVE,
+	MALI_GROUP_STATE_ACTIVATION_PENDING,
+	MALI_GROUP_STATE_ACTIVE,
+};
+
+/**
+ * The structure represents a render group
+ * A render group is defined by all the cores that share the same Mali MMU
+ */
+
+struct mali_group {
+	struct mali_mmu_core        *mmu;
+	struct mali_session_data    *session;
+
+	enum mali_group_state        state;
+	mali_bool                    power_is_on;
+
+	mali_bool                    is_working;
+	unsigned long                start_time; /* in ticks */
+
+	struct mali_gp_core         *gp_core;
+	struct mali_gp_job          *gp_running_job;
+
+	struct mali_pp_core         *pp_core;
+	struct mali_pp_job          *pp_running_job;
+	u32                         pp_running_sub_job;
+
+	struct mali_pm_domain       *pm_domain;
+
+	struct mali_l2_cache_core   *l2_cache_core[2];
+	u32                         l2_cache_core_ref_count[2];
+
+	/* Parent virtual group (if any) */
+	struct mali_group           *parent_group;
+
+	struct mali_dlbu_core       *dlbu_core;
+	struct mali_bcast_unit      *bcast_core;
+
+	/* Used for working groups which needs to be disabled */
+	mali_bool                    disable_requested;
+
+	/* Used by group to link child groups (for virtual group) */
+	_mali_osk_list_t            group_list;
+
+	/* Used by executor module in order to link groups of same state */
+	_mali_osk_list_t            executor_list;
+
+	/* Used by PM domains to link groups of same domain */
+	_mali_osk_list_t             pm_domain_list;
+
+	_mali_osk_wq_work_t         *bottom_half_work_mmu;
+	_mali_osk_wq_work_t         *bottom_half_work_gp;
+	_mali_osk_wq_work_t         *bottom_half_work_pp;
+
+	_mali_osk_wq_work_t         *oom_work_handler;
+	_mali_osk_timer_t           *timeout_timer;
+};
+
+/** @brief Create a new Mali group object
+ *
+ * @return A pointer to a new group object
+ */
+struct mali_group *mali_group_create(struct mali_l2_cache_core *core,
+				     struct mali_dlbu_core *dlbu,
+				     struct mali_bcast_unit *bcast,
+				     u32 domain_index);
+
+void mali_group_dump_status(struct mali_group *group);
+
+void mali_group_delete(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group,
+		struct mali_mmu_core *mmu_core);
+void mali_group_remove_mmu_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group,
+		struct mali_gp_core *gp_core);
+void mali_group_remove_gp_core(struct mali_group *group);
+
+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group,
+		struct mali_pp_core *pp_core);
+void mali_group_remove_pp_core(struct mali_group *group);
+
+MALI_STATIC_INLINE const char *mali_group_core_description(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	if (NULL != group->pp_core) {
+		return mali_pp_core_description(group->pp_core);
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+		return mali_gp_core_description(group->gp_core);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->dlbu_core);
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Check if a group is a part of a virtual group or not
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+/** @brief Reset group
+ *
+ * This function will reset the entire group,
+ * including all the cores present in the group.
+ *
+ * @param group Pointer to the group to reset
+ */
+void mali_group_reset(struct mali_group *group);
+
+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	return group->session;
+}
+
+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	if (NULL != group->session) {
+		mali_mmu_activate_empty_page_directory(group->mmu);
+		group->session = NULL;
+	}
+}
+
+enum mali_group_state mali_group_activate(struct mali_group *group);
+
+/*
+ * Change state from ACTIVATION_PENDING to ACTIVE
+ * For virtual group, all childs need to be ACTIVE first
+ */
+mali_bool mali_group_set_active(struct mali_group *group);
+
+/*
+ * @return MALI_TRUE means one or more domains can now be powered off,
+ * and caller should call either mali_pm_update_async() or
+ * mali_pm_update_sync() in order to do so.
+ */
+mali_bool mali_group_deactivate(struct mali_group *group);
+
+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->state;
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->power_is_on;
+}
+
+void mali_group_power_up(struct mali_group *group);
+void mali_group_power_down(struct mali_group *group);
+
+MALI_STATIC_INLINE void mali_group_set_disable_request(
+	struct mali_group *group, mali_bool disable)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	group->disable_requested = disable;
+
+	/**
+	 * When one of child group's disable_requeset is set TRUE, then
+	 * the disable_request of parent group should also be set to TRUE.
+	 * While, the disable_request of parent group should only be set to FALSE
+	 * only when all of its child group's disable_request are set to FALSE.
+	 */
+	if (NULL != group->parent_group && MALI_TRUE == disable) {
+		group->parent_group->disable_requested = disable;
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_disable_requested(
+	struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->disable_requested;
+}
+
+/** @brief Virtual groups */
+void mali_group_add_group(struct mali_group *parent, struct mali_group *child);
+struct mali_group *mali_group_acquire_group(struct mali_group *parent);
+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child);
+
+/** @brief Checks if the group is working.
+ */
+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	if (mali_group_is_in_virtual(group)) {
+		struct mali_group *tmp_group = mali_executor_get_virtual_group();
+		return tmp_group->is_working;
+	}
+	return group->is_working;
+}
+
+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return group->gp_running_job;
+}
+
+/** @brief Zap MMU TLB on all groups
+ *
+ * Zap TLB on group if \a session is active.
+ */
+mali_bool mali_group_zap_session(struct mali_group *group,
+				 struct mali_session_data *session);
+
+/** @brief Get pointer to GP core object
+ */
+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->gp_core;
+}
+
+/** @brief Get pointer to PP core object
+ */
+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	return group->pp_core;
+}
+
+/** @brief Start GP job
+ */
+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job);
+
+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job);
+
+/** @brief Start virtual group Job on a virtual group
+*/
+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob);
+
+
+/** @brief Start a subjob from a particular on a specific PP group
+*/
+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob);
+
+
+/** @brief remove all the unused groups in tmp_unused group  list, so that the group is in consistent status.
+ */
+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group);
+
+
+/** @brief Resume GP job that suspended waiting for more heap memory
+ */
+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_get_interrupt_result(group->gp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_get_interrupt_result(group->pp_core);
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_mmu_get_interrupt_result(group->mmu);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_is_active(group->gp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_is_active(group->pp_core);
+}
+
+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group)
+{
+	unsigned long time_cost;
+	struct mali_group *tmp_group = group;
+
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+
+	/* if the group is in virtual need to use virtual_group's start time */
+	if (mali_group_is_in_virtual(group)) {
+		tmp_group = mali_executor_get_virtual_group();
+	}
+
+	time_cost = _mali_osk_time_tickcount() - tmp_group->start_time;
+	if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) {
+		/*
+		 * current tick is at or after timeout end time,
+		 * so this is a valid timeout
+		 */
+		return MALI_TRUE;
+	} else {
+		/*
+		 * Not a valid timeout. A HW interrupt probably beat
+		 * us to it, and the timer wasn't properly deleted
+		 * (async deletion used due to atomic context).
+		 */
+		return MALI_FALSE;
+	}
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_gp_mask_all_interrupts(group->gp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return mali_pp_mask_all_interrupts(group->pp_core);
+}
+
+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp(
+	struct mali_group *group,
+	enum mali_interrupt_result exceptions)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	mali_gp_enable_interrupts(group->gp_core, exceptions);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_gp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_oom_work_handler(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->gp_core);
+	_mali_osk_wq_schedule_work(group->oom_work_handler);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->pp_core);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_pp);
+}
+
+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT_POINTER(group->mmu);
+	_mali_osk_wq_schedule_work(group->bottom_half_work_mmu);
+}
+
+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job);
+
+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success);
+
+#if defined(CONFIG_MALI400_PROFILING)
+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group)
+{
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+				      MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0),
+				      0, 0, 0, 0, 0);
+}
+#endif
+
+struct mali_group *mali_group_get_glob_group(u32 index);
+u32 mali_group_get_glob_num_groups(void);
+
+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size);
+
+
+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data);
+_mali_osk_errcode_t mali_group_upper_half_gp(void *data);
+_mali_osk_errcode_t mali_group_upper_half_pp(void *data);
+
+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(group);
+	MALI_DEBUG_ASSERT(mali_group_is_virtual(group));
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	return _mali_osk_list_empty(&group->group_list);
+}
+
+#endif /* __MALI_GROUP_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_hw_core.c b/drivers/gpu/arm/utgard/common/mali_hw_core.c
new file mode 100644
index 000000000000..c90cf38d8516
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_hw_core.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_hw_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_osk_mali.h"
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size)
+{
+	core->phys_addr = resource->base;
+	core->phys_offset = resource->base - _mali_osk_resource_base_address();
+	core->description = resource->description;
+	core->size = reg_size;
+
+	MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) {
+		core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description);
+		if (NULL != core->mapped_registers) {
+			return _MALI_OSK_ERR_OK;
+		} else {
+			MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+		}
+		_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+	} else {
+		MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_hw_core_delete(struct mali_hw_core *core)
+{
+	if (NULL != core->mapped_registers) {
+		_mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers);
+		core->mapped_registers = NULL;
+	}
+	_mali_osk_mem_unreqregion(core->phys_addr, core->size);
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_hw_core.h b/drivers/gpu/arm/utgard/common/mali_hw_core.h
new file mode 100644
index 000000000000..ac2ffbedf308
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_hw_core.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_HW_CORE_H__
+#define __MALI_HW_CORE_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU)
+ * This struct is embedded inside all core specific structs.
+ */
+struct mali_hw_core {
+	uintptr_t phys_addr;              /**< Physical address of the registers */
+	u32 phys_offset;                  /**< Offset from start of Mali to registers */
+	u32 size;                         /**< Size of registers */
+	mali_io_address mapped_registers; /**< Virtual mapping of the registers */
+	const char *description;          /**< Name of unit (as specified in device configuration) */
+};
+
+#define MALI_REG_POLL_COUNT_FAST 1000000
+#define MALI_REG_POLL_COUNT_SLOW 1000000
+
+/*
+ * GP and PP core translate their int_stat/rawstat into one of these
+ */
+enum mali_interrupt_result {
+	MALI_INTERRUPT_RESULT_NONE,
+	MALI_INTERRUPT_RESULT_SUCCESS,
+	MALI_INTERRUPT_RESULT_SUCCESS_VS,
+	MALI_INTERRUPT_RESULT_SUCCESS_PLBU,
+	MALI_INTERRUPT_RESULT_OOM,
+	MALI_INTERRUPT_RESULT_ERROR
+};
+
+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size);
+void mali_hw_core_delete(struct mali_hw_core *core);
+
+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address)
+{
+	u32 read_val;
+	read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address);
+	MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, read_val));
+	return read_val;
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+}
+
+/* Conditionally write a register.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	if (old_val != new_val) {
+		_mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val);
+	}
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val)
+{
+	MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n",
+			     core->description, relative_address, new_val));
+	_mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val);
+}
+
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+	}
+}
+
+/* Conditionally write a set of registers.
+ * The register will only be written if the new value is different from the old_value.
+ * If the new value is different, the old value will also be updated */
+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array)
+{
+	u32 i;
+	MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n",
+			     core->description, relative_address, nr_of_regs));
+
+	/* Do not use burst writes against the registers */
+	for (i = 0; i < nr_of_regs; i++) {
+		if (old_array[i] != write_array[i]) {
+			mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]);
+		}
+	}
+}
+
+#endif /* __MALI_HW_CORE_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_common.h b/drivers/gpu/arm/utgard/common/mali_kernel_common.h
new file mode 100644
index 000000000000..990cf3ac78e0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_common.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_COMMON_H__
+#define __MALI_KERNEL_COMMON_H__
+
+#include "mali_osk.h"
+
+/* Make sure debug is defined when it should be */
+#ifndef DEBUG
+#if defined(_DEBUG)
+#define DEBUG
+#endif
+#endif
+
+/* The file include several useful macros for error checking, debugging and printing.
+ * - MALI_PRINTF(...)           Do not use this function: Will be included in Release builds.
+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL.
+ * - MALI_DEBUG_ERROR( (X) )    Prints an errortext, a source trace, and the given error message.
+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit.
+ * - MALI_DEBUG_ASSERT_POINTER(pointer)  Triggers if the pointer is a zero pointer.
+ * - MALI_DEBUG_CODE( X )       The code inside the macro is only compiled in Debug builds.
+ *
+ * The (X) means that you must add an extra parenthesis around the argumentlist.
+ *
+ * The  printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg
+ *
+ * Suggested range for the DEBUG-LEVEL is [1:6] where
+ * [1:2] Is messages with highest priority, indicate possible errors.
+ * [3:4] Is messages with medium priority, output important variables.
+ * [5:6] Is messages with low priority, used during extensive debugging.
+ */
+
+/**
+*  Fundamental error macro. Reports an error code. This is abstracted to allow us to
+*  easily switch to a different error reporting method if we want, and also to allow
+*  us to search for error returns easily.
+*
+*  Note no closing semicolon - this is supplied in typical usage:
+*
+*  MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY);
+*/
+#define MALI_ERROR(error_code) return (error_code)
+
+/**
+ *  Basic error macro, to indicate success.
+ *  Note no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_SUCCESS;
+ */
+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK)
+
+/**
+ *  Basic error macro. This checks whether the given condition is true, and if not returns
+ *  from this function with the supplied error code. This is a macro so that we can override it
+ *  for stress testing.
+ *
+ *  Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling
+ *  else clauses. Note also no closing semicolon - this is supplied in typical usage:
+ *
+ *  MALI_CHECK((p!=NULL), ERROR_NO_OBJECT);
+ */
+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0)
+
+/**
+ *  Error propagation macro. If the expression given is anything other than
+ *  _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function
+ *  as an error code. This effectively acts as a guard clause, and propagates
+ *  error values up the call stack. This uses a temporary value to ensure that
+ *  the error expression is not evaluated twice.
+ *  If the counter for forcing a failure has been set using _mali_force_error,
+ *  this error will be returned without evaluating the expression in
+ *  MALI_CHECK_NO_ERROR
+ */
+#define MALI_CHECK_NO_ERROR(expression) \
+	do { _mali_osk_errcode_t _check_no_error_result=(expression); \
+		if(_check_no_error_result != _MALI_OSK_ERR_OK) \
+			MALI_ERROR(_check_no_error_result); \
+	} while(0)
+
+/**
+ *  Pointer check macro. Checks non-null pointer.
+ */
+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) )
+
+/**
+ *  Error macro with goto. This checks whether the given condition is true, and if not jumps
+ *  to the specified label using a goto. The label must therefore be local to the function in
+ *  which this macro appears. This is most usually used to execute some clean-up code before
+ *  exiting with a call to ERROR.
+ *
+ *  Like the other macros, this is a macro to allow us to override the condition if we wish,
+ *  e.g. to force an error during stress testing.
+ */
+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0)
+
+/**
+ *  Explicitly ignore a parameter passed into a function, to suppress compiler warnings.
+ *  Should only be used with parameter names.
+ */
+#define MALI_IGNORE(x) x=x
+
+#if defined(CONFIG_MALI_QUIET)
+#define MALI_PRINTF(args)
+#else
+#define MALI_PRINTF(args) _mali_osk_dbgmsg args;
+#endif
+
+#define MALI_PRINT_ERROR(args) do{ \
+		MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \
+		MALI_PRINTF(("           %s()%4d\n           ", __FUNCTION__, __LINE__)) ; \
+		MALI_PRINTF(args); \
+		MALI_PRINTF(("\n")); \
+	} while(0)
+
+#define MALI_PRINT(args) do{ \
+		MALI_PRINTF(("Mali: ")); \
+		MALI_PRINTF(args); \
+	} while (0)
+
+#ifdef DEBUG
+#ifndef mali_debug_level
+extern int mali_debug_level;
+#endif
+
+#define MALI_DEBUG_CODE(code) code
+#define MALI_DEBUG_PRINT(level, args)  do { \
+		if((level) <=  mali_debug_level)\
+		{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \
+	} while (0)
+
+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args)
+
+#define MALI_DEBUG_PRINT_IF(level,condition,args)  \
+	if((condition)&&((level) <=  mali_debug_level))\
+	{MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+#define MALI_DEBUG_PRINT_ELSE(level, args)\
+	else if((level) <=  mali_debug_level)\
+	{ MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); }
+
+/**
+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint
+ * to be entered (see _mali_osk_break() ). An alternative would be to call
+ * _mali_osk_abort(), on OSs that support it.
+ */
+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do  {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do  {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0)
+#define MALI_DEBUG_ASSERT(condition) do  {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0)
+
+#else /* DEBUG */
+
+#define MALI_DEBUG_CODE(code)
+#define MALI_DEBUG_PRINT(string,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0)
+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0)
+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0)
+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0)
+#define MALI_DEBUG_ASSERT(condition) do {} while(0)
+
+#endif /* DEBUG */
+
+/**
+ * variables from user space cannot be dereferenced from kernel space; tagging them
+ * with __user allows the GCC compiler to generate a warning. Other compilers may
+ * not support this so we define it here as an empty macro if the compiler doesn't
+ * define it.
+ */
+#ifndef __user
+#define __user
+#endif
+
+#endif /* __MALI_KERNEL_COMMON_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_core.c b/drivers/gpu/arm/utgard/common/mali_kernel_core.c
new file mode 100644
index 000000000000..b9a6b7a0a277
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_core.c
@@ -0,0 +1,1314 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_kernel_core.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_mmu.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_dlbu.h"
+#include "mali_broadcast.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_scheduler.h"
+#include "mali_kernel_utilization.h"
+#include "mali_l2_cache.h"
+#include "mali_timeline.h"
+#include "mali_soft_job.h"
+#include "mali_pm_domain.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#include "mali_control_timer.h"
+#include "mali_dvfs_policy.h"
+#include <linux/sched.h>
+
+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff
+
+/* Mali GPU memory. Real values come from module parameter or from device specific data */
+unsigned int mali_dedicated_mem_start = 0;
+unsigned int mali_dedicated_mem_size = 0;
+
+/* Default shared memory size is set to 4G. */
+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE;
+
+/* Frame buffer memory to be accessible by Mali GPU */
+int mali_fb_start = 0;
+int mali_fb_size = 0;
+
+/* Mali max job runtime */
+extern int mali_max_job_runtime;
+
+/** Start profiling from module load? */
+int mali_boot_profiling = 0;
+
+/** Limits for the number of PP cores behind each L2 cache. */
+int mali_max_pp_cores_group_1 = 0xFF;
+int mali_max_pp_cores_group_2 = 0xFF;
+
+int mali_inited_pp_cores_group_1 = 0;
+int mali_inited_pp_cores_group_2 = 0;
+
+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN;
+static uintptr_t global_gpu_base_address = 0;
+static u32 global_gpu_major_version = 0;
+static u32 global_gpu_minor_version = 0;
+
+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE;
+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE;
+
+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+	global_gpu_base_address = _mali_osk_resource_base_address();
+	if (0 == global_gpu_base_address) {
+		err = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return err;
+}
+
+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp)
+{
+	switch (resource_pp->base - global_gpu_base_address) {
+	case 0x08000:
+	case 0x20000: /* fall-through for aliased mapping */
+		return 0x01;
+	case 0x0A000:
+	case 0x22000: /* fall-through for aliased mapping */
+		return 0x02;
+	case 0x0C000:
+	case 0x24000: /* fall-through for aliased mapping */
+		return 0x04;
+	case 0x0E000:
+	case 0x26000: /* fall-through for aliased mapping */
+		return 0x08;
+	case 0x28000:
+		return 0x10;
+	case 0x2A000:
+		return 0x20;
+	case 0x2C000:
+		return 0x40;
+	case 0x2E000:
+		return 0x80;
+	default:
+		return 0;
+	}
+}
+
+static _mali_osk_errcode_t mali_parse_product_info(void)
+{
+	_mali_osk_resource_t first_pp_resource;
+
+	/* Find the first PP core resource (again) */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) {
+		/* Create a dummy PP object for this core so that we can read the version register */
+		struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0);
+		if (NULL != group) {
+			struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource));
+			if (NULL != pp_core) {
+				u32 pp_version;
+
+				pp_version = mali_pp_core_get_version(pp_core);
+
+				mali_group_delete(group);
+
+				global_gpu_major_version = (pp_version >> 8) & 0xFF;
+				global_gpu_minor_version = pp_version & 0xFF;
+
+				switch (pp_version >> 16) {
+				case MALI200_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI200;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n"));
+					_mali_osk_abort();
+					break;
+				case MALI300_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI300;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI400_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI400;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI450_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI450;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				case MALI470_PP_PRODUCT_ID:
+					global_product_id = _MALI_PRODUCT_ID_MALI470;
+					MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version));
+					break;
+				default:
+					MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				return _MALI_OSK_ERR_OK;
+			} else {
+				MALI_PRINT_ERROR(("Failed to create initial PP object\n"));
+			}
+		} else {
+			MALI_PRINT_ERROR(("Failed to create initial group object\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("First PP core not specified in config file\n"));
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+static void mali_delete_groups(void)
+{
+	struct mali_group *group;
+
+	group = mali_group_get_glob_group(0);
+	while (NULL != group) {
+		mali_group_delete(group);
+		group = mali_group_get_glob_group(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups());
+}
+
+static void mali_delete_l2_cache_cores(void)
+{
+	struct mali_l2_cache_core *l2;
+
+	l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	while (NULL != l2) {
+		mali_l2_cache_delete(l2);
+		l2 = mali_l2_cache_core_get_glob_l2_core(0);
+	}
+
+	MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores());
+}
+
+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (NULL != resource) {
+
+		MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description));
+
+		l2_cache = mali_l2_cache_create(resource, domain_index);
+		if (NULL == l2_cache) {
+			MALI_PRINT_ERROR(("Failed to create L2 cache object\n"));
+			return NULL;
+		}
+	}
+	MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n"));
+
+	return l2_cache;
+}
+
+static _mali_osk_errcode_t mali_parse_config_l2_cache(void)
+{
+	struct mali_l2_cache_core *l2_cache = NULL;
+
+	if (mali_is_mali400()) {
+		_mali_osk_resource_t l2_resource;
+		if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20);
+		if (NULL == l2_cache) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else if (mali_is_mali450()) {
+		/*
+		 * L2 for GP    at 0x10000
+		 * L2 for PP0-3 at 0x01000
+		 * L2 for PP4-7 at 0x11000 (optional)
+		 */
+
+		_mali_osk_resource_t l2_gp_resource;
+		_mali_osk_resource_t l2_pp_grp0_resource;
+		_mali_osk_resource_t l2_pp_grp1_resource;
+
+		/* Make cluster for GP's L2 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Find corresponding l2 domain */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		/* Second PP core group is optional, don't fail if we don't find it */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		}
+	} else if (mali_is_mali470()) {
+		_mali_osk_resource_t l2c1_resource;
+
+		/* Make cluster for L2C1 */
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) {
+			MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n"));
+			l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21);
+			if (NULL == l2_cache) {
+				return _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache,
+		_mali_osk_resource_t *resource_mmu,
+		_mali_osk_resource_t *resource_gp,
+		_mali_osk_resource_t *resource_pp,
+		u32 domain_index)
+{
+	struct mali_mmu_core *mmu;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description));
+
+	/* Create the group object */
+	group = mali_group_create(cache, NULL, NULL, domain_index);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description));
+		return NULL;
+	}
+
+	/* Create the MMU object inside group */
+	mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE);
+	if (NULL == mmu) {
+		MALI_PRINT_ERROR(("Failed to create MMU object\n"));
+		mali_group_delete(group);
+		return NULL;
+	}
+
+	if (NULL != resource_gp) {
+		/* Create the GP core object inside this group */
+		struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group);
+		if (NULL == gp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create GP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	if (NULL != resource_pp) {
+		struct mali_pp_core *pp_core;
+
+		/* Create the PP core object inside this group */
+		pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp));
+		if (NULL == pp_core) {
+			/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+			MALI_PRINT_ERROR(("Failed to create PP object\n"));
+			mali_group_delete(group);
+			return NULL;
+		}
+	}
+
+	return group;
+}
+
+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast,
+		_mali_osk_resource_t *resource_pp_bcast,
+		_mali_osk_resource_t *resource_dlbu,
+		_mali_osk_resource_t *resource_bcast)
+{
+	struct mali_mmu_core *mmu_pp_bcast_core;
+	struct mali_pp_core *pp_bcast_core;
+	struct mali_dlbu_core *dlbu_core;
+	struct mali_bcast_unit *bcast_core;
+	struct mali_group *group;
+
+	MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+
+	/* Create the DLBU core object */
+	dlbu_core = mali_dlbu_create(resource_dlbu);
+	if (NULL == dlbu_core) {
+		MALI_PRINT_ERROR(("Failed to create DLBU object \n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the Broadcast unit core */
+	bcast_core = mali_bcast_unit_create(resource_bcast);
+	if (NULL == bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the group object */
+#if defined(DEBUG)
+	/* Get a physical PP group to temporarily add to broadcast unit.  IRQ
+	 * verification needs a physical group in the broadcast unit to test
+	 * the broadcast unit interrupt line. */
+	{
+		struct mali_group *phys_group = NULL;
+		int i;
+		for (i = 0; i < mali_group_get_glob_num_groups(); i++) {
+			phys_group = mali_group_get_glob_group(i);
+			if (NULL != mali_group_get_pp_core(phys_group)) break;
+		}
+		MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group));
+
+		/* Add the group temporarily to the broadcast, and update the
+		 * broadcast HW. Since the HW is not updated when removing the
+		 * group the IRQ check will work when the virtual PP is created
+		 * later.
+		 *
+		 * When the virtual group gets populated, the actually used
+		 * groups will be added to the broadcast unit and the HW will
+		 * be updated.
+		 */
+		mali_bcast_add_group(bcast_core, phys_group);
+		mali_bcast_reset(bcast_core);
+		mali_bcast_remove_group(bcast_core, phys_group);
+	}
+#endif /* DEBUG */
+	group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY);
+	if (NULL == group) {
+		MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description));
+		mali_bcast_unit_delete(bcast_core);
+		mali_dlbu_delete(dlbu_core);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the MMU object inside group */
+	mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE);
+	if (NULL == mmu_pp_bcast_core) {
+		MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create the PP core object inside this group */
+	pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0);
+	if (NULL == pp_bcast_core) {
+		/* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */
+		MALI_PRINT_ERROR(("Failed to create PP object\n"));
+		mali_group_delete(group);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_groups(void)
+{
+	struct mali_group *group;
+	int cluster_id_gp = 0;
+	int cluster_id_pp_grp0 = 0;
+	int cluster_id_pp_grp1 = 0;
+	int i;
+
+	_mali_osk_resource_t resource_gp;
+	_mali_osk_resource_t resource_gp_mmu;
+	_mali_osk_resource_t resource_pp[8];
+	_mali_osk_resource_t resource_pp_mmu[8];
+	_mali_osk_resource_t resource_pp_mmu_bcast;
+	_mali_osk_resource_t resource_pp_bcast;
+	_mali_osk_resource_t resource_dlbu;
+	_mali_osk_resource_t resource_bcast;
+	_mali_osk_errcode_t resource_gp_found;
+	_mali_osk_errcode_t resource_gp_mmu_found;
+	_mali_osk_errcode_t resource_pp_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_found[8];
+	_mali_osk_errcode_t resource_pp_mmu_bcast_found;
+	_mali_osk_errcode_t resource_pp_bcast_found;
+	_mali_osk_errcode_t resource_dlbu_found;
+	_mali_osk_errcode_t resource_bcast_found;
+
+	if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) {
+		/* No known HW core */
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) {
+		/* Group settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			if (0 != data.max_job_runtime) {
+				mali_max_job_runtime = data.max_job_runtime;
+			}
+		}
+	}
+
+	if (mali_is_mali450()) {
+		/* Mali-450 have separate L2s for GP, and PP core group(s) */
+		cluster_id_pp_grp0 = 1;
+		cluster_id_pp_grp1 = 2;
+	}
+
+	resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp);
+	resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu);
+	resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0]));
+	resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1]));
+	resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2]));
+	resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3]));
+	resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4]));
+	resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5]));
+	resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6]));
+	resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7]));
+	resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0]));
+	resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1]));
+	resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2]));
+	resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3]));
+	resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4]));
+	resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5]));
+	resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6]));
+	resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7]));
+
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast);
+		resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu);
+		resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast);
+		resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast);
+
+		if (_MALI_OSK_ERR_OK != resource_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_dlbu_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found ||
+		    _MALI_OSK_ERR_OK != resource_pp_bcast_found) {
+			/* Missing mandatory core(s) for Mali-450 or Mali-470 */
+			MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK != resource_gp_found ||
+	    _MALI_OSK_ERR_OK != resource_gp_mmu_found ||
+	    _MALI_OSK_ERR_OK != resource_pp_found[0] ||
+	    _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) {
+		/* Missing mandatory core(s) */
+		MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores());
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create group for first (and mandatory) PP core */
+	MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */
+	group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0);
+	if (NULL == group) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_inited_pp_cores_group_1++;
+
+	/* Create groups for rest of the cores in the first PP core group */
+	for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */
+		if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_1++;
+			}
+		}
+	}
+
+	/* Create groups for cores in the second PP core group */
+	for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */
+		if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) {
+			if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) {
+				MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */
+				group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i);
+				if (NULL == group) {
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				mali_inited_pp_cores_group_2++;
+			}
+		}
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		_mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	}
+
+	mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1;
+	mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2;
+	MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_check_shared_interrupts(void)
+{
+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	if (MALI_TRUE == _mali_osk_shared_interrupts()) {
+		MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	/* It is OK to compile support for shared interrupts even if Mali is not using it. */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_pmu(void)
+{
+	_mali_osk_resource_t resource_pmu;
+
+	MALI_DEBUG_ASSERT(0 != global_gpu_base_address);
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) {
+		struct mali_pmu_core *pmu;
+
+		pmu = mali_pmu_create(&resource_pmu);
+		if (NULL == pmu) {
+			MALI_PRINT_ERROR(("Failed to create PMU\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	/* It's ok if the PMU doesn't exist */
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_parse_config_memory(void)
+{
+	_mali_osk_device_data data = { 0, };
+	_mali_osk_errcode_t ret;
+
+	/* The priority of setting the value of mali_shared_mem_size,
+	 * mali_dedicated_mem_start and mali_dedicated_mem_size:
+	 * 1. module parameter;
+	 * 2. platform data;
+	 * 3. default value;
+	 **/
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Memory settings are not overridden by module parameters, so use device settings */
+		if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) {
+			/* Use device specific settings (if defined) */
+			mali_dedicated_mem_start = data.dedicated_mem_start;
+			mali_dedicated_mem_size = data.dedicated_mem_size;
+		}
+
+		if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size &&
+		    0 != data.shared_mem_size) {
+			mali_shared_mem_size = data.shared_mem_size;
+		}
+	}
+
+	if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n",
+				     mali_dedicated_mem_size, mali_dedicated_mem_start));
+
+		/* Dedicated memory */
+		ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register dedicated memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 < mali_shared_mem_size) {
+		MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size));
+
+		/* Shared OS memory */
+		ret = mali_memory_core_resource_os_memory(mali_shared_mem_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register shared OS memory\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	if (0 == mali_fb_start && 0 == mali_fb_size) {
+		/* Frame buffer settings are not overridden by module parameters, so use device settings */
+		_mali_osk_device_data data = { 0, };
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+			/* Use device specific settings (if defined) */
+			mali_fb_start = data.fb_start;
+			mali_fb_size = data.fb_size;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	} else {
+		MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n",
+				     mali_fb_size, mali_fb_start));
+	}
+
+	if (0 != mali_fb_size) {
+		/* Register frame buffer */
+		ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n"));
+			mali_memory_terminate();
+			return ret;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_detect_gpu_class(void)
+{
+	if (_mali_osk_identify_gpu_resource() == 0x450)
+		mali_gpu_class_is_mali450 = MALI_TRUE;
+
+	if (_mali_osk_identify_gpu_resource() == 0x470)
+		mali_gpu_class_is_mali470 = MALI_TRUE;
+}
+
+static _mali_osk_errcode_t mali_init_hw_reset(void)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	_mali_osk_resource_t resource_bcast;
+
+	/* Ensure broadcast unit is in a good state before we start creating
+	 * groups and cores.
+	 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) {
+		struct mali_bcast_unit *bcast_core;
+
+		bcast_core = mali_bcast_unit_create(&resource_bcast);
+		if (NULL == bcast_core) {
+			MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n"));
+			return _MALI_OSK_ERR_FAULT;
+		}
+		mali_bcast_unit_delete(bcast_core);
+	}
+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_initialize_subsystems(void)
+{
+	_mali_osk_errcode_t err;
+
+#if defined(CONFIG_MALI_DT) && !defined(CONFIG_MALI_PLAT_SPECIFIC_DT)
+	err = _mali_osk_resource_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	mali_pp_job_initialize();
+
+	mali_timeline_initialize();
+
+	err = mali_session_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (_MALI_OSK_ERR_OK != err) {
+		/* No biggie if we weren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	err = mali_memory_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_executor_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_scheduler_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Configure memory early, needed by mali_mmu_initialize. */
+	err = mali_parse_config_memory();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_set_global_gpu_base_address();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect GPU class (uses L2 cache count) */
+	mali_detect_gpu_class();
+
+	err = mali_check_shared_interrupts();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the MALI PMU (will not touch HW!) */
+	err = mali_parse_config_pmu();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the power management module */
+	err = mali_pm_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Make sure the entire GPU stays on for the rest of this function */
+	mali_pm_init_begin();
+
+	/* Ensure HW is in a good state before starting to access cores. */
+	err = mali_init_hw_reset();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Detect which Mali GPU we are dealing with */
+	err = mali_parse_product_info();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* The global_product_id is now populated with the correct Mali GPU */
+
+	/* Start configuring the actual Mali hardware. */
+
+	err = mali_mmu_initialize();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		err = mali_dlbu_initialize();
+		if (_MALI_OSK_ERR_OK != err) {
+			mali_pm_init_end();
+			mali_terminate_subsystems();
+			return err;
+		}
+	}
+
+	err = mali_parse_config_l2_cache();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	err = mali_parse_config_groups();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Move groups into executor */
+	mali_executor_populate();
+
+	/* Need call after all group has assigned a domain */
+	mali_pm_power_cost_setup();
+
+	/* Initialize the GPU timer */
+	err = mali_control_timer_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+	/* Initialize the GPU utilization tracking */
+	err = mali_utilization_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	err = mali_dvfs_policy_init();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_init_end();
+		mali_terminate_subsystems();
+		return err;
+	}
+#endif
+
+	/* Allowing the system to be turned off */
+	mali_pm_init_end();
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+void mali_terminate_subsystems(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n"));
+
+	mali_utilization_term();
+	mali_control_timer_term();
+
+	mali_executor_depopulate();
+	mali_delete_groups(); /* Delete groups not added to executor */
+	mali_executor_terminate();
+
+	mali_scheduler_terminate();
+	mali_pp_job_terminate();
+	mali_delete_l2_cache_cores();
+	mali_mmu_terminate();
+
+	if (mali_is_mali450() || mali_is_mali470()) {
+		mali_dlbu_terminate();
+	}
+
+	mali_pm_terminate();
+
+	if (NULL != pmu) {
+		mali_pmu_delete(pmu);
+	}
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_term();
+#endif
+
+	mali_memory_terminate();
+
+	mali_session_terminate();
+
+	mali_timeline_terminate();
+
+	global_gpu_base_address = 0;
+}
+
+_mali_product_id_t mali_kernel_core_get_product_id(void)
+{
+	return global_product_id;
+}
+
+u32 mali_kernel_core_get_gpu_major_version(void)
+{
+	return global_gpu_major_version;
+}
+
+u32 mali_kernel_core_get_gpu_minor_version(void)
+{
+	return global_gpu_minor_version;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	/* check compatability */
+	if (args->version == _MALI_UK_API_VERSION) {
+		args->compatible = 1;
+	} else {
+		args->compatible = 0;
+	}
+
+	args->version = _MALI_UK_API_VERSION; /* report our version */
+
+	/* success regardless of being compatible or not */
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS;
+		return _MALI_OSK_ERR_OK;
+	}
+
+	/* receive a notification, might sleep */
+	err = _mali_osk_notification_queue_receive(queue, &notification);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_ERROR(err); /* errcode returned, pass on to caller */
+	}
+
+	/* copy the buffer to the user */
+	args->type = (_mali_uk_notification_type)notification->notification_type;
+	_mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size);
+
+	/* finished with the notification */
+	_mali_osk_notification_delete(notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args)
+{
+	_mali_osk_notification_t *notification;
+	_mali_osk_notification_queue_t *queue;
+	struct mali_session_data *session;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	queue = session->ioctl_queue;
+
+	/* if the queue does not exist we're currently shutting down */
+	if (NULL == queue) {
+		MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n"));
+		return _MALI_OSK_ERR_OK;
+	}
+
+	notification = _mali_osk_notification_create(args->type, 0);
+	if (NULL == notification) {
+		MALI_PRINT_ERROR(("Failed to create notification object\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	_mali_osk_notification_queue_send(queue, notification);
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args)
+{
+	wait_queue_head_t *queue;
+
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	queue = mali_session_get_wait_queue();
+
+	/* check pending big job number, might sleep if larger than MAX allowed number */
+	if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
+
+
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (!session->use_high_priority_job_queue) {
+		session->use_high_priority_job_queue = MALI_TRUE;
+		MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid()));
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_open(void **context)
+{
+	u32 i;
+	struct mali_session_data *session;
+
+	/* allocated struct to track this session */
+	session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data));
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM);
+
+	MALI_DEBUG_PRINT(3, ("Session starting\n"));
+
+	/* create a response queue for this session */
+	session->ioctl_queue = _mali_osk_notification_queue_init();
+	if (NULL == session->ioctl_queue) {
+		goto err;
+	}
+
+	session->page_directory = mali_mmu_pagedir_alloc();
+	if (NULL == session->page_directory) {
+		goto err_mmu;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) {
+		MALI_PRINT_ERROR(("Failed to map DLBU page into session\n"));
+		goto err_mmu;
+	}
+
+	if (0 != mali_dlbu_phys_addr) {
+		mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr,
+					_MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) {
+		goto err_session;
+	}
+
+	/* Create soft system. */
+	session->soft_job_system = mali_soft_job_system_create(session);
+	if (NULL == session->soft_job_system) {
+		goto err_soft;
+	}
+
+	/* Create timeline system. */
+	session->timeline_system = mali_timeline_system_create(session);
+	if (NULL == session->timeline_system) {
+		goto err_time_line;
+	}
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_init(&session->number_of_window_jobs, 0);
+#endif
+
+	session->use_high_priority_job_queue = MALI_FALSE;
+
+	/* Initialize list of PP jobs on this session. */
+	_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list);
+
+	/* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */
+	for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) {
+		_MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]);
+	}
+
+	session->pid = _mali_osk_get_pid();
+	session->comm = _mali_osk_get_comm();
+	session->max_mali_mem_allocated_size = 0;
+	for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) {
+		atomic_set(&session->mali_mem_array[i], 0);
+	}
+	atomic_set(&session->mali_mem_allocated_pages, 0);
+	*context = (void *)session;
+
+	/* Add session to the list of all sessions. */
+	mali_session_add(session);
+
+	MALI_DEBUG_PRINT(3, ("Session started\n"));
+	return _MALI_OSK_ERR_OK;
+
+err_time_line:
+	mali_soft_job_system_destroy(session->soft_job_system);
+err_soft:
+	mali_memory_session_end(session);
+err_session:
+	mali_mmu_pagedir_free(session->page_directory);
+err_mmu:
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+err:
+	_mali_osk_free(session);
+	MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+}
+
+#if defined(DEBUG)
+/* parameter used for debug */
+extern u32 num_pm_runtime_resume;
+extern u32 num_pm_updates;
+extern u32 num_pm_updates_up;
+extern u32 num_pm_updates_down;
+#endif
+
+_mali_osk_errcode_t _mali_ukk_close(void **context)
+{
+	struct mali_session_data *session;
+	MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS);
+	session = (struct mali_session_data *)*context;
+
+	MALI_DEBUG_PRINT(3, ("Session ending\n"));
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+	MALI_DEBUG_ASSERT_POINTER(session->timeline_system);
+
+	/* Remove session from list of all sessions. */
+	mali_session_remove(session);
+
+	/* This flag is used to prevent queueing of jobs due to activation. */
+	session->is_aborting = MALI_TRUE;
+
+	/* Stop the soft job timer. */
+	mali_timeline_system_stop_timer(session->timeline_system);
+
+	/* Abort queued jobs */
+	mali_scheduler_abort_session(session);
+
+	/* Abort executing jobs */
+	mali_executor_abort_session(session);
+
+	/* Abort the soft job system. */
+	mali_soft_job_system_abort(session->soft_job_system);
+
+	/* Force execution of all pending bottom half processing for GP and PP. */
+	_mali_osk_wq_flush();
+
+	/* The session PP list should now be empty. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list));
+
+	/* At this point the GP and PP scheduler no longer has any jobs queued or running from this
+	 * session, and all soft jobs in the soft job system has been destroyed. */
+
+	/* Any trackers left in the timeline system are directly or indirectly waiting on external
+	 * sync fences.  Cancel all sync fence waiters to trigger activation of all remaining
+	 * trackers.  This call will sleep until all timelines are empty. */
+	mali_timeline_system_abort(session->timeline_system);
+
+	/* Flush pending work.
+	 * Needed to make sure all bottom half processing related to this
+	 * session has been completed, before we free internal data structures.
+	 */
+	_mali_osk_wq_flush();
+
+	/* Destroy timeline system. */
+	mali_timeline_system_destroy(session->timeline_system);
+	session->timeline_system = NULL;
+
+	/* Destroy soft system. */
+	mali_soft_job_system_destroy(session->soft_job_system);
+	session->soft_job_system = NULL;
+
+	MALI_DEBUG_CODE({
+		/* Check that the pp_job_fb_lookup_list array is empty. */
+		u32 i;
+		for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i)
+		{
+			MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_fb_lookup_list[i]));
+		}
+	});
+
+	/* Free remaining memory allocated to this session */
+	mali_memory_session_end(session);
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_term(&session->number_of_window_jobs);
+#endif
+
+#if defined(CONFIG_MALI400_PROFILING)
+	_mali_osk_profiling_stop_sampling(session->pid);
+#endif
+
+	/* Free session data structures */
+	mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE);
+	mali_mmu_pagedir_free(session->page_directory);
+	_mali_osk_notification_queue_term(session->ioctl_queue);
+	_mali_osk_free(session);
+
+	*context = NULL;
+
+	MALI_DEBUG_PRINT(3, ("Session has ended\n"));
+
+#if defined(DEBUG)
+	MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume));
+	MALI_DEBUG_PRINT(3, ("       # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down));
+
+	num_pm_runtime_resume = 0;
+	num_pm_updates = 0;
+	num_pm_updates_up = 0;
+	num_pm_updates_down = 0;
+#endif
+
+	return _MALI_OSK_ERR_OK;;
+}
+
+#if MALI_STATE_TRACKING
+u32 _mali_kernel_core_dump_state(char *buf, u32 size)
+{
+	int n = 0; /* Number of bytes written to buf */
+
+	n += mali_scheduler_dump_state(buf + n, size - n);
+	n += mali_executor_dump_state(buf + n, size - n);
+
+	return n;
+}
+#endif
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_core.h b/drivers/gpu/arm/utgard/common/mali_kernel_core.h
new file mode 100644
index 000000000000..8cdbc5af3205
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_core.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_CORE_H__
+#define __MALI_KERNEL_CORE_H__
+
+#include "mali_osk.h"
+
+typedef enum {
+	_MALI_PRODUCT_ID_UNKNOWN,
+	_MALI_PRODUCT_ID_MALI200,
+	_MALI_PRODUCT_ID_MALI300,
+	_MALI_PRODUCT_ID_MALI400,
+	_MALI_PRODUCT_ID_MALI450,
+	_MALI_PRODUCT_ID_MALI470,
+} _mali_product_id_t;
+
+extern mali_bool mali_gpu_class_is_mali450;
+extern mali_bool mali_gpu_class_is_mali470;
+
+_mali_osk_errcode_t mali_initialize_subsystems(void);
+
+void mali_terminate_subsystems(void);
+
+_mali_product_id_t mali_kernel_core_get_product_id(void);
+
+u32 mali_kernel_core_get_gpu_major_version(void);
+
+u32 mali_kernel_core_get_gpu_minor_version(void);
+
+u32 _mali_kernel_core_dump_state(char *buf, u32 size);
+
+MALI_STATIC_INLINE mali_bool mali_is_mali470(void)
+{
+	return mali_gpu_class_is_mali470;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali450(void)
+{
+	return mali_gpu_class_is_mali450;
+}
+
+MALI_STATIC_INLINE mali_bool mali_is_mali400(void)
+{
+	if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+#endif /* __MALI_KERNEL_CORE_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_utilization.c b/drivers/gpu/arm/utgard/common/mali_kernel_utilization.c
new file mode 100644
index 000000000000..63b941742249
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_utilization.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_utilization.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_scheduler.h"
+
+#include "mali_executor.h"
+#include "mali_dvfs_policy.h"
+#include "mali_control_timer.h"
+
+/* Thresholds for GP bound detection. */
+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
+
+static _mali_osk_spinlock_irq_t *utilization_data_lock;
+
+static u32 num_running_gp_cores = 0;
+static u32 num_running_pp_cores = 0;
+
+static u64 work_start_time_gpu = 0;
+static u64 work_start_time_gp = 0;
+static u64 work_start_time_pp = 0;
+static u64 accumulated_work_time_gpu = 0;
+static u64 accumulated_work_time_gp = 0;
+static u64 accumulated_work_time_pp = 0;
+
+static u32 last_utilization_gpu = 0 ;
+static u32 last_utilization_gp = 0 ;
+static u32 last_utilization_pp = 0 ;
+
+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
+
+/* Define the first timer control timer timeout in milliseconds */
+static u32 mali_control_first_timeout = 100;
+static struct mali_gpu_utilization_data mali_util_data = {0, };
+
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
+{
+	u64 time_now;
+	u32 leading_zeroes;
+	u32 shift_val;
+	u32 work_normalized_gpu;
+	u32 work_normalized_gp;
+	u32 work_normalized_pp;
+	u32 period_normalized;
+	u32 utilization_gpu;
+	u32 utilization_gp;
+	u32 utilization_pp;
+
+	mali_utilization_data_lock();
+
+	time_now = _mali_osk_time_get_ns();
+
+	*time_period = time_now - *start_time;
+
+	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
+		mali_control_timer_pause();
+		/*
+		 * No work done for this period
+		 * - No need to reschedule timer
+		 * - Report zero usage
+		 */
+		last_utilization_gpu = 0;
+		last_utilization_gp = 0;
+		last_utilization_pp = 0;
+
+		mali_util_data.utilization_gpu = last_utilization_gpu;
+		mali_util_data.utilization_gp = last_utilization_gp;
+		mali_util_data.utilization_pp = last_utilization_pp;
+
+		mali_utilization_data_unlock();
+
+		*need_add_timer = MALI_FALSE;
+
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+
+		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+		return &mali_util_data;
+	}
+
+	/* If we are currently busy, update working period up to now */
+	if (work_start_time_gpu != 0) {
+		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+		work_start_time_gpu = time_now;
+
+		/* GP and/or PP will also be busy if the GPU is busy at this point */
+
+		if (work_start_time_gp != 0) {
+			accumulated_work_time_gp += (time_now - work_start_time_gp);
+			work_start_time_gp = time_now;
+		}
+
+		if (work_start_time_pp != 0) {
+			accumulated_work_time_pp += (time_now - work_start_time_pp);
+			work_start_time_pp = time_now;
+		}
+	}
+
+	/*
+	 * We have two 64-bit values, a dividend and a divisor.
+	 * To avoid dependencies to a 64-bit divider, we shift down the two values
+	 * equally first.
+	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
+	 */
+
+	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
+	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
+	shift_val = 32 - leading_zeroes;
+	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
+	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
+	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
+	period_normalized = (u32)(*time_period >> shift_val);
+
+	/*
+	 * Now, we should report the usage in parts of 256
+	 * this means we must shift up the dividend or down the divisor by 8
+	 * (we could do a combination, but we just use one for simplicity,
+	 * but the end result should be good enough anyway)
+	 */
+	if (period_normalized > 0x00FFFFFF) {
+		/* The divisor is so big that it is safe to shift it down */
+		period_normalized >>= 8;
+	} else {
+		/*
+		 * The divisor is so small that we can shift up the dividend, without loosing any data.
+		 * (dividend is always smaller than the divisor)
+		 */
+		work_normalized_gpu <<= 8;
+		work_normalized_gp <<= 8;
+		work_normalized_pp <<= 8;
+	}
+
+	utilization_gpu = work_normalized_gpu / period_normalized;
+	utilization_gp = work_normalized_gp / period_normalized;
+	utilization_pp = work_normalized_pp / period_normalized;
+
+	last_utilization_gpu = utilization_gpu;
+	last_utilization_gp = utilization_gp;
+	last_utilization_pp = utilization_pp;
+
+	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
+	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
+		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
+	} else {
+		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
+	}
+
+	/* starting a new period */
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	*start_time = time_now;
+
+	mali_util_data.utilization_gp = last_utilization_gp;
+	mali_util_data.utilization_gpu = last_utilization_gpu;
+	mali_util_data.utilization_pp = last_utilization_pp;
+
+	mali_utilization_data_unlock();
+
+	*need_add_timer = MALI_TRUE;
+
+	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
+	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
+	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
+
+	return &mali_util_data;
+}
+
+_mali_osk_errcode_t mali_utilization_init(void)
+{
+#if USING_GPU_UTILIZATION
+	_mali_osk_device_data data;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		if (NULL != data.utilization_callback) {
+			mali_utilization_callback = data.utilization_callback;
+			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
+		}
+	}
+#endif /* defined(USING_GPU_UTILIZATION) */
+
+	if (NULL == mali_utilization_callback) {
+		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
+	}
+
+	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
+	if (NULL == utilization_data_lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	num_running_gp_cores = 0;
+	num_running_pp_cores = 0;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_utilization_term(void)
+{
+	if (NULL != utilization_data_lock) {
+		_mali_osk_spinlock_irq_term(utilization_data_lock);
+	}
+}
+
+void mali_utilization_gp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_gp_cores;
+	if (1 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First GP core started, consider GP busy from now and onwards */
+		work_start_time_gp = time_now;
+
+		if (0 == num_running_pp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			is_resume  = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+				/* Do some policy in new period for performance consideration */
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_pp_start(void)
+{
+	mali_utilization_data_lock();
+
+	++num_running_pp_cores;
+	if (1 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* First PP core started, consider PP busy from now and onwards */
+		work_start_time_pp = time_now;
+
+		if (0 == num_running_gp_cores) {
+			mali_bool is_resume = MALI_FALSE;
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be busy as well.
+			 */
+			work_start_time_gpu = time_now;
+
+			/* Start a new period if stoped */
+			is_resume = mali_control_timer_resume(time_now);
+
+			mali_utilization_data_unlock();
+
+			if (is_resume) {
+#if defined(CONFIG_MALI_DVFS)
+				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
+				mali_session_max_window_num();
+				if (0 == last_utilization_gpu) {
+					/*
+					 * for mali_dev_pause is called in set clock,
+					 * so each time we change clock, we will set clock to
+					 * highest step even if under down clock case,
+					 * it is not nessesary, so we only set the clock under
+					 * last time utilization equal 0, we stop the timer then
+					 * start the GPU again case
+					 */
+					mali_dvfs_policy_new_period();
+				}
+#endif
+
+				/*
+				 * First timeout using short interval for power consideration
+				 * because we give full power in the new period, but if the
+				 * job loading is light, finish in 10ms, the other time all keep
+				 * in high freq it will wast time.
+				 */
+				mali_control_timer_add(mali_control_first_timeout);
+			}
+		} else {
+			mali_utilization_data_unlock();
+		}
+	} else {
+		/* Nothing to do */
+		mali_utilization_data_unlock();
+	}
+}
+
+void mali_utilization_gp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_gp_cores;
+	if (0 == num_running_gp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last GP core ended, consider GP idle from now and onwards */
+		accumulated_work_time_gp += (time_now - work_start_time_gp);
+		work_start_time_gp = 0;
+
+		if (0 == num_running_pp_cores) {
+			/*
+			 * There are no PP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+void mali_utilization_pp_end(void)
+{
+	mali_utilization_data_lock();
+
+	--num_running_pp_cores;
+	if (0 == num_running_pp_cores) {
+		u64 time_now = _mali_osk_time_get_ns();
+
+		/* Last PP core ended, consider PP idle from now and onwards */
+		accumulated_work_time_pp += (time_now - work_start_time_pp);
+		work_start_time_pp = 0;
+
+		if (0 == num_running_gp_cores) {
+			/*
+			 * There are no GP cores running, so this is also the point
+			 * at which we consider the GPU to be idle as well.
+			 */
+			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
+			work_start_time_gpu = 0;
+		}
+	}
+
+	mali_utilization_data_unlock();
+}
+
+mali_bool mali_utilization_enabled(void)
+{
+#if defined(CONFIG_MALI_DVFS)
+	return mali_dvfs_policy_enabled();
+#else
+	return (NULL != mali_utilization_callback);
+#endif /* defined(CONFIG_MALI_DVFS) */
+}
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
+{
+	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
+
+	mali_utilization_callback(util_data);
+}
+
+void mali_utilization_reset(void)
+{
+	accumulated_work_time_gpu = 0;
+	accumulated_work_time_gp = 0;
+	accumulated_work_time_pp = 0;
+
+	last_utilization_gpu = 0;
+	last_utilization_gp = 0;
+	last_utilization_pp = 0;
+}
+
+void mali_utilization_data_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(utilization_data_lock);
+}
+
+void mali_utilization_data_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
+}
+
+void mali_utilization_data_assert_locked(void)
+{
+	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
+}
+
+u32 _mali_ukk_utilization_gp_pp(void)
+{
+	return last_utilization_gpu;
+}
+
+u32 _mali_ukk_utilization_gp(void)
+{
+	return last_utilization_gp;
+}
+
+u32 _mali_ukk_utilization_pp(void)
+{
+	return last_utilization_pp;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_utilization.h b/drivers/gpu/arm/utgard/common/mali_kernel_utilization.h
new file mode 100644
index 000000000000..3c20b1983762
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_utilization.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_UTILIZATION_H__
+#define __MALI_KERNEL_UTILIZATION_H__
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_osk.h"
+
+/**
+ * Initialize/start the Mali GPU utilization metrics reporting.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_utilization_init(void);
+
+/**
+ * Terminate the Mali GPU utilization metrics reporting
+ */
+void mali_utilization_term(void);
+
+/**
+ * Check if Mali utilization is enabled
+ */
+mali_bool mali_utilization_enabled(void);
+
+/**
+ * Should be called when a job is about to execute a GP job
+ */
+void mali_utilization_gp_start(void);
+
+/**
+ * Should be called when a job has completed executing a GP job
+ */
+void mali_utilization_gp_end(void);
+
+/**
+ * Should be called when a job is about to execute a PP job
+ */
+void mali_utilization_pp_start(void);
+
+/**
+ * Should be called when a job has completed executing a PP job
+ */
+void mali_utilization_pp_end(void);
+
+/**
+ * Should be called to calcution the GPU utilization
+ */
+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer);
+
+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void);
+
+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data);
+
+void mali_utilization_data_lock(void);
+
+void mali_utilization_data_unlock(void);
+
+void mali_utilization_data_assert_locked(void);
+
+void mali_utilization_reset(void);
+
+
+#endif /* __MALI_KERNEL_UTILIZATION_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_kernel_vsync.c b/drivers/gpu/arm/utgard/common/mali_kernel_vsync.c
new file mode 100644
index 000000000000..2eed4c88cf56
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_kernel_vsync.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+#include "mali_osk_profiling.h"
+
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args)
+{
+	_mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event;
+	MALI_IGNORE(event); /* event is not used for release code, and that is OK */
+
+	/*
+	 * Manually generate user space events in kernel space.
+	 * This saves user space from calling kernel space twice in this case.
+	 * We just need to remember to add pid and tid manually.
+	 */
+	if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+	if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) {
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME |
+					      MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+					      MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC,
+					      _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event));
+	MALI_SUCCESS;
+}
+
diff --git a/drivers/gpu/arm/utgard/common/mali_l2_cache.c b/drivers/gpu/arm/utgard/common/mali_l2_cache.c
new file mode 100644
index 000000000000..494ba789cd08
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_l2_cache.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_scheduler.h"
+#include "mali_pm.h"
+#include "mali_pm_domain.h"
+
+/**
+ * Size of the Mali L2 cache registers in bytes
+ */
+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30
+
+/**
+ * Mali L2 cache register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_l2_cache_register {
+	MALI400_L2_CACHE_REGISTER_SIZE         = 0x0004,
+	MALI400_L2_CACHE_REGISTER_STATUS       = 0x0008,
+	/*unused                               = 0x000C */
+	MALI400_L2_CACHE_REGISTER_COMMAND      = 0x0010,
+	MALI400_L2_CACHE_REGISTER_CLEAR_PAGE   = 0x0014,
+	MALI400_L2_CACHE_REGISTER_MAX_READS    = 0x0018,
+	MALI400_L2_CACHE_REGISTER_ENABLE       = 0x001C,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028,
+	MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C,
+} mali_l2_cache_register;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_command {
+	MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01,
+} mali_l2_cache_command;
+
+/**
+ * Mali L2 cache commands
+ * These are the commands that can be sent to the Mali L2 cache unit
+ */
+typedef enum mali_l2_cache_enable {
+	MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */
+	MALI400_L2_CACHE_ENABLE_ACCESS = 0x01,
+	MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02,
+} mali_l2_cache_enable;
+
+/**
+ * Mali L2 cache status bits
+ */
+typedef enum mali_l2_cache_status {
+	MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01,
+	MALI400_L2_CACHE_STATUS_DATA_BUSY    = 0x02,
+} mali_l2_cache_status;
+
+#define MALI400_L2_MAX_READS_NOT_SET -1
+
+static struct mali_l2_cache_core *
+	mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, };
+static u32 mali_global_num_l2s = 0;
+
+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET;
+
+
+/* Local helper functions */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache);
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val);
+
+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_lock(cache->lock);
+}
+
+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	_mali_osk_spinlock_irq_unlock(cache->lock);
+}
+
+/* Implementation of the L2 cache interface */
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index)
+{
+	struct mali_l2_cache_core *cache = NULL;
+#if defined(DEBUG)
+	u32 cache_size;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n",
+			     resource->description));
+
+	if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n"));
+		return NULL;
+	}
+
+	cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core));
+	if (NULL == cache) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n"));
+		return NULL;
+	}
+
+	cache->core_id =  mali_global_num_l2s;
+	cache->counter_src0 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_src1 = MALI_HW_CORE_NO_COUNTER;
+	cache->counter_value0_base = 0;
+	cache->counter_value1_base = 0;
+	cache->pm_domain = NULL;
+	cache->power_is_on = MALI_FALSE;
+	cache->last_invalidated_id = 0;
+
+	if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core,
+			resource, MALI400_L2_CACHE_REGISTERS_SIZE)) {
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	cache_size = mali_hw_core_register_read(&cache->hw_core,
+						MALI400_L2_CACHE_REGISTER_SIZE);
+	MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n",
+			     resource->description,
+			     1 << (((cache_size >> 16) & 0xff) - 10),
+			     1 << ((cache_size >> 8) & 0xff),
+			     1 << (cache_size & 0xff),
+			     1 << ((cache_size >> 24) & 0xff)));
+#endif
+
+	cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_L2);
+	if (NULL == cache->lock) {
+		MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n",
+				  cache->hw_core.description));
+		mali_hw_core_delete(&cache->hw_core);
+		_mali_osk_free(cache);
+		return NULL;
+	}
+
+	/* register with correct power domain */
+	cache->pm_domain = mali_pm_register_l2_cache(
+				   domain_index, cache);
+
+	mali_global_l2s[mali_global_num_l2s] = cache;
+	mali_global_num_l2s++;
+
+	return cache;
+}
+
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		if (mali_global_l2s[i] != cache) {
+			continue;
+		}
+
+		mali_global_l2s[i] = NULL;
+		mali_global_num_l2s--;
+
+		if (i == mali_global_num_l2s) {
+			/* Removed last element, nothing more to do */
+			break;
+		}
+
+		/*
+		 * We removed a l2 cache from the middle of the array,
+		 * so move the last l2 cache to current position
+		 */
+		mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s];
+		mali_global_l2s[mali_global_num_l2s] = NULL;
+
+		/* All good */
+		break;
+	}
+
+	_mali_osk_spinlock_irq_term(cache->lock);
+	mali_hw_core_delete(&cache->hw_core);
+	_mali_osk_free(cache);
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	mali_l2_cache_reset(cache);
+
+	if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask)
+		MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on);
+	cache->power_is_on = MALI_TRUE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	mali_l2_cache_lock(cache);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on);
+
+	/*
+	 * The HW counters will start from zero again when we resume,
+	 * but we should report counters as always increasing.
+	 * Take a copy of the HW values now in order to add this to
+	 * the values we report after being powered up.
+	 *
+	 * The physical power off of the L2 cache might be outside our
+	 * own control (e.g. runtime PM). That is why we must manually
+	 * set set the counter value to zero as well.
+	 */
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value0_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		cache->counter_value1_base += mali_hw_core_register_read(
+						      &cache->hw_core,
+						      MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0);
+	}
+
+
+	cache->power_is_on = MALI_FALSE;
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter)
+{
+	u32 reg_offset_src;
+	u32 reg_offset_val;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1);
+
+	mali_l2_cache_lock(cache);
+
+	if (0 == source_id) {
+		/* start counting from 0 */
+		cache->counter_value0_base = 0;
+		cache->counter_src0 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0;
+	} else {
+		/* start counting from 0 */
+		cache->counter_value1_base = 0;
+		cache->counter_src1 = counter;
+		reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1;
+		reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1;
+	}
+
+	if (cache->power_is_on) {
+		u32 hw_src;
+
+		if (MALI_HW_CORE_NO_COUNTER != counter) {
+			hw_src = counter;
+		} else {
+			hw_src = 0; /* disable value for HW */
+		}
+
+		/* Set counter src */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_src, hw_src);
+
+		/* Make sure the HW starts counting from 0 again */
+		mali_hw_core_register_write(&cache->hw_core,
+					    reg_offset_val, 0);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT(NULL != src0);
+	MALI_DEBUG_ASSERT(NULL != value0);
+	MALI_DEBUG_ASSERT(NULL != src1);
+	MALI_DEBUG_ASSERT(NULL != value1);
+
+	mali_l2_cache_lock(cache);
+
+	*src0 = cache->counter_src0;
+	*src1 = cache->counter_src1;
+
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value0 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0);
+		} else {
+			*value0 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value0 += cache->counter_value0_base;
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		if (MALI_TRUE == cache->power_is_on) {
+			*value1 = mali_hw_core_register_read(&cache->hw_core,
+							     MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1);
+		} else {
+			*value1 = 0;
+		}
+
+		/* Add base offset value (in case we have been power off) */
+		*value1 += cache->counter_value1_base;
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index)
+{
+	if (mali_global_num_l2s > index) {
+		return mali_global_l2s[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void)
+{
+	return mali_global_num_l2s;
+}
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	mali_l2_cache_lock(cache);
+
+	cache->last_invalidated_id = mali_scheduler_get_new_cache_order();
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+
+	if (NULL == cache) {
+		return;
+	}
+
+	/*
+	 * If the last cache invalidation was done by a job with a higher id we
+	 * don't have to flush. Since user space will store jobs w/ their
+	 * corresponding memory in sequence (first job #0, then job #1, ...),
+	 * we don't have to flush for job n-1 if job n has already invalidated
+	 * the cache since we know for sure that job n-1's memory was already
+	 * written when job n was started.
+	 */
+
+	mali_l2_cache_lock(cache);
+
+	if (((s32)id) > ((s32)cache->last_invalidated_id)) {
+		/* Set latest invalidated id to current "point in time" */
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+		mali_l2_cache_send_command(cache,
+					   MALI400_L2_CACHE_REGISTER_COMMAND,
+					   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+	}
+
+	mali_l2_cache_unlock(cache);
+}
+
+void mali_l2_cache_invalidate_all(void)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		_mali_osk_errcode_t ret;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		cache->last_invalidated_id =
+			mali_scheduler_get_new_cache_order();
+
+		ret = mali_l2_cache_send_command(cache,
+						 MALI400_L2_CACHE_REGISTER_COMMAND,
+						 MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_PRINT_ERROR(("Failed to invalidate cache\n"));
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages)
+{
+	u32 i;
+	for (i = 0; i < mali_global_num_l2s; i++) {
+		struct mali_l2_cache_core *cache = mali_global_l2s[i];
+		u32 j;
+
+		MALI_DEBUG_ASSERT_POINTER(cache);
+
+		mali_l2_cache_lock(cache);
+
+		if (MALI_TRUE != cache->power_is_on) {
+			mali_l2_cache_unlock(cache);
+			continue;
+		}
+
+		for (j = 0; j < num_pages; j++) {
+			_mali_osk_errcode_t ret;
+
+			ret = mali_l2_cache_send_command(cache,
+							 MALI400_L2_CACHE_REGISTER_CLEAR_PAGE,
+							 pages[j]);
+			if (_MALI_OSK_ERR_OK != ret) {
+				MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n"));
+			}
+		}
+
+		mali_l2_cache_unlock(cache);
+	}
+}
+
+/* -------- local helper functions below -------- */
+
+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/* Invalidate cache (just to keep it in a known state at startup) */
+	mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND,
+				   MALI400_L2_CACHE_COMMAND_CLEAR_ALL);
+
+	/* Enable cache */
+	mali_hw_core_register_write(&cache->hw_core,
+				    MALI400_L2_CACHE_REGISTER_ENABLE,
+				    (u32)MALI400_L2_CACHE_ENABLE_ACCESS |
+				    (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE);
+
+	if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_MAX_READS,
+					    (u32)mali_l2_max_reads);
+	}
+
+	/* Restart any performance counters (if enabled) */
+	if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) {
+
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0,
+					    cache->counter_src0);
+	}
+
+	if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) {
+		mali_hw_core_register_write(&cache->hw_core,
+					    MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1,
+					    cache->counter_src1);
+	}
+}
+
+static _mali_osk_errcode_t mali_l2_cache_send_command(
+	struct mali_l2_cache_core *cache, u32 reg, u32 val)
+{
+	int i = 0;
+	const int loop_count = 100000;
+
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock);
+
+	/*
+	 * First, wait for L2 cache command handler to go idle.
+	 * (Commands received while processing another command will be ignored)
+	 */
+	for (i = 0; i < loop_count; i++) {
+		if (!(mali_hw_core_register_read(&cache->hw_core,
+						 MALI400_L2_CACHE_REGISTER_STATUS) &
+		      (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) {
+			break;
+		}
+	}
+
+	if (i == loop_count) {
+		MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* then issue the command */
+	mali_hw_core_register_write(&cache->hw_core, reg, val);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_l2_cache.h b/drivers/gpu/arm/utgard/common/mali_l2_cache.h
new file mode 100644
index 000000000000..6dc8ec22d6de
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_l2_cache.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_L2_CACHE_H__
+#define __MALI_KERNEL_L2_CACHE_H__
+
+#include "mali_osk.h"
+#include "mali_hw_core.h"
+
+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES  3
+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */
+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5
+
+/**
+ * Definition of the L2 cache core struct
+ * Used to track a L2 cache unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_l2_cache_core {
+	/* Common HW core functionality */
+	struct mali_hw_core hw_core;
+
+	/* Synchronize L2 cache access */
+	_mali_osk_spinlock_irq_t *lock;
+
+	/* Unique core ID */
+	u32 core_id;
+
+	/* The power domain this L2 cache belongs to */
+	struct mali_pm_domain *pm_domain;
+
+	/* MALI_TRUE if power is on for this L2 cache */
+	mali_bool power_is_on;
+
+	/* A "timestamp" to avoid unnecessary flushes */
+	u32 last_invalidated_id;
+
+	/* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src0;
+
+	/* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+	u32 counter_src1;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value0_base;
+
+	/*
+	 * Performance counter 0 value base/offset
+	 * (allows accumulative reporting even after power off)
+	 */
+	u32 counter_value1_base;
+
+	/* Used by PM domains to link L2 caches of same domain */
+	_mali_osk_list_t pm_domain_list;
+};
+
+_mali_osk_errcode_t mali_l2_cache_initialize(void);
+void mali_l2_cache_terminate(void);
+
+struct mali_l2_cache_core *mali_l2_cache_create(
+	_mali_osk_resource_t *resource, u32 domain_index);
+void mali_l2_cache_delete(struct mali_l2_cache_core *cache);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->core_id;
+}
+
+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->pm_domain;
+}
+
+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache);
+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache);
+
+void mali_l2_cache_core_set_counter_src(
+	struct mali_l2_cache_core *cache, u32 source_id, u32 counter);
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src0;
+}
+
+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1(
+	struct mali_l2_cache_core *cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(cache);
+	return cache->counter_src1;
+}
+
+void mali_l2_cache_core_get_counter_values(
+	struct mali_l2_cache_core *cache,
+	u32 *src0, u32 *value0, u32 *src1, u32 *value1);
+
+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index);
+u32 mali_l2_cache_core_get_glob_num_l2_cores(void);
+
+struct mali_group *mali_l2_cache_get_group(
+	struct mali_l2_cache_core *cache, u32 index);
+
+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache);
+void mali_l2_cache_invalidate_conditional(
+	struct mali_l2_cache_core *cache, u32 id);
+
+void mali_l2_cache_invalidate_all(void);
+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages);
+
+#endif /* __MALI_KERNEL_L2_CACHE_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_mem_validation.c b/drivers/gpu/arm/utgard/common/mali_mem_validation.c
new file mode 100644
index 000000000000..e2b5b2a7f739
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mem_validation.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_mem_validation.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF
+
+typedef struct {
+	u32 phys_base;        /**< Mali physical base of the memory, page aligned */
+	u32 size;             /**< size in bytes of the memory, multiple of page size */
+} _mali_mem_validation_t;
+
+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR };
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size)
+{
+	/* Check that no other MEM_VALIDATION resources exist */
+	if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Check restrictions on page alignment */
+	if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) ||
+	    (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+		MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	mali_mem_validator.phys_base = start;
+	mali_mem_validator.size = size;
+	MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n",
+			     mali_mem_validator.phys_base, mali_mem_validator.size));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size)
+{
+	if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */
+		if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) &&
+		    (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) {
+			if ((phys_addr          >= mali_mem_validator.phys_base) &&
+			    ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) &&
+			    (phys_addr          <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) &&
+			    ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) {
+				MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1)));
+				return _MALI_OSK_ERR_OK;
+			}
+		}
+	}
+
+	MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size));
+
+	return _MALI_OSK_ERR_FAULT;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_mem_validation.h b/drivers/gpu/arm/utgard/common/mali_mem_validation.h
new file mode 100644
index 000000000000..267720625d87
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mem_validation.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEM_VALIDATION_H__
+#define __MALI_MEM_VALIDATION_H__
+
+#include "mali_osk.h"
+
+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size);
+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size);
+
+#endif /* __MALI_MEM_VALIDATION_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_mmu.c b/drivers/gpu/arm/utgard/common/mali_mmu.c
new file mode 100644
index 000000000000..b975c1468d67
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mmu.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_ukk.h"
+
+#include "mali_mmu.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "mali_mmu_page_directory.h"
+
+/**
+ * Size of the MMU registers in bytes
+ */
+#define MALI_MMU_REGISTERS_SIZE 0x24
+
+/**
+ * MMU commands
+ * These are the commands that can be sent
+ * to the MMU unit.
+ */
+typedef enum mali_mmu_command {
+	MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */
+	MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */
+	MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**<  Enable stall on page fault */
+	MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */
+	MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */
+	MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */
+	MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */
+} mali_mmu_command;
+
+static void mali_mmu_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data);
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu);
+
+/* page fault queue flush helper pages
+ * note that the mapping pointers are currently unused outside of the initialization functions */
+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL;
+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE;
+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL;
+
+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */
+static mali_dma_addr mali_empty_page_directory_phys   = MALI_INVALID_PAGE;
+static mali_io_address mali_empty_page_directory_virt = NULL;
+
+
+_mali_osk_errcode_t mali_mmu_initialize(void)
+{
+	/* allocate the helper pages */
+	mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt);
+	if (0 == mali_empty_page_directory_phys) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n"));
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory,
+			&mali_page_fault_flush_page_directory_mapping,
+			&mali_page_fault_flush_page_table,
+			&mali_page_fault_flush_page_table_mapping,
+			&mali_page_fault_flush_data_page,
+			&mali_page_fault_flush_data_page_mapping)) {
+		MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n"));
+		mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+		mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+		mali_empty_page_directory_virt = NULL;
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mmu_terminate(void)
+{
+	MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n"));
+
+	/* Free global helper pages */
+	mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt);
+	mali_empty_page_directory_phys = MALI_INVALID_PAGE;
+	mali_empty_page_directory_virt = NULL;
+
+	/* Free the page fault flush pages */
+	mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory,
+				       &mali_page_fault_flush_page_directory_mapping,
+				       &mali_page_fault_flush_page_table,
+				       &mali_page_fault_flush_page_table_mapping,
+				       &mali_page_fault_flush_data_page,
+				       &mali_page_fault_flush_data_page_mapping);
+}
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual)
+{
+	struct mali_mmu_core *mmu = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(resource);
+
+	MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description));
+
+	mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core));
+	if (NULL != mmu) {
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) {
+			if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) {
+				if (is_virtual) {
+					/* Skip reset and IRQ setup for virtual MMU */
+					return mmu;
+				}
+
+				if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					mmu->irq = _mali_osk_irq_init(resource->irq,
+								      mali_group_upper_half_mmu,
+								      group,
+								      mali_mmu_probe_trigger,
+								      mali_mmu_probe_ack,
+								      mmu,
+								      resource->description);
+					if (NULL != mmu->irq) {
+						return mmu;
+					} else {
+						MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description));
+					}
+				}
+				mali_group_remove_mmu_core(group);
+			} else {
+				MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description));
+			}
+			mali_hw_core_delete(&mmu->hw_core);
+		}
+
+		_mali_osk_free(mmu);
+	} else {
+		MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n"));
+	}
+
+	return NULL;
+}
+
+void mali_mmu_delete(struct mali_mmu_core *mmu)
+{
+	if (NULL != mmu->irq) {
+		_mali_osk_irq_term(mmu->irq);
+	}
+
+	mali_hw_core_delete(&mmu->hw_core);
+	_mali_osk_free(mmu);
+}
+
+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+	}
+}
+
+/**
+ * Issues the enable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out)
+ */
+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n"));
+		return MALI_TRUE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n"));
+		return MALI_FALSE;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) {
+			break;
+		}
+		if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return MALI_FALSE;
+	}
+
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n"));
+		return MALI_FALSE;
+	}
+
+	return MALI_TRUE;
+}
+
+/**
+ * Issues the disable stall command to the MMU and waits for HW to complete the request
+ * @param mmu The MMU to enable paging for
+ */
+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu)
+{
+	int i;
+	u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+
+	if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+		MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n"));
+		return;
+	}
+	if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+		MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n"));
+		return;
+	}
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+		if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) {
+			break;
+		}
+		if (status &  MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) {
+			break;
+		}
+		if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE);
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu)
+{
+	int i;
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE);
+	MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET);
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) {
+		if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) {
+			break;
+		}
+	}
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu)
+{
+	_mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT;
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	if (!stall_success) {
+		err = _MALI_OSK_ERR_BUSY;
+	}
+
+	MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description));
+
+	if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) {
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+		/* no session is active, so just activate the empty page directory */
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys);
+		mali_mmu_enable_paging(mmu);
+		err = _MALI_OSK_ERR_OK;
+	}
+	mali_mmu_disable_stall(mmu);
+
+	return err;
+}
+
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success = mali_mmu_enable_stall(mmu);
+
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+	if (MALI_FALSE == stall_success) {
+		/* False means that it is in Pagefault state. Not possible to disable_stall then */
+		return MALI_FALSE;
+	}
+
+	mali_mmu_disable_stall(mmu);
+	return MALI_TRUE;
+}
+
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+}
+
+
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address));
+}
+
+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory)
+{
+	/* The MMU must be in stalled or page fault mode, for this writing to work */
+	MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)
+				& (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE)));
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory);
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE);
+
+}
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+	mali_mmu_activate_address_space(mmu, pagedir->page_directory);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+	MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description));
+
+	stall_success = mali_mmu_enable_stall(mmu);
+
+	/* This function can only be called when the core is idle, so it could not fail. */
+	MALI_DEBUG_ASSERT(stall_success);
+	MALI_IGNORE(stall_success);
+
+	mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys);
+	mali_mmu_disable_stall(mmu);
+}
+
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu)
+{
+	mali_bool stall_success;
+	MALI_DEBUG_ASSERT_POINTER(mmu);
+
+	MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description));
+	stall_success = mali_mmu_enable_stall(mmu);
+	/* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */
+	mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory);
+	if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu);
+}
+
+/* Is called when we want the mmu to give an interrupt */
+static void mali_mmu_probe_trigger(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+}
+
+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */
+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data)
+{
+	struct mali_mmu_core *mmu = (struct mali_mmu_core *)data;
+	u32 int_stat;
+
+	int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+
+	MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat));
+	if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) {
+		MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n"));
+	}
+
+	if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) {
+		MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n"));
+		mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR);
+	} else {
+		MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n"));
+	}
+
+	if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) ==
+	    (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) {
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+#if 0
+void mali_mmu_print_state(struct mali_mmu_core *mmu)
+{
+	MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS)));
+}
+#endif
diff --git a/drivers/gpu/arm/utgard/common/mali_mmu.h b/drivers/gpu/arm/utgard/common/mali_mmu.h
new file mode 100644
index 000000000000..101c968bd45d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_H__
+#define __MALI_MMU_H__
+
+#include "mali_osk.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_hw_core.h"
+
+/* Forward declaration from mali_group.h */
+struct mali_group;
+
+/**
+ * MMU register numbers
+ * Used in the register read/write routines.
+ * See the hardware documentation for more information about each register
+ */
+typedef enum mali_mmu_register {
+	MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */
+	MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */
+	MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */
+	MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */
+	MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */
+	MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */
+	MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */
+	MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */
+	MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */
+} mali_mmu_register;
+
+/**
+ * MMU interrupt register bits
+ * Each cause of the interrupt is reported
+ * through the (raw) interrupt status registers.
+ * Multiple interrupts can be pending, so multiple bits
+ * can be set at once.
+ */
+typedef enum mali_mmu_interrupt {
+	MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */
+	MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */
+} mali_mmu_interrupt;
+
+typedef enum mali_mmu_status_bits {
+	MALI_MMU_STATUS_BIT_PAGING_ENABLED      = 1 << 0,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE   = 1 << 1,
+	MALI_MMU_STATUS_BIT_STALL_ACTIVE        = 1 << 2,
+	MALI_MMU_STATUS_BIT_IDLE                = 1 << 3,
+	MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4,
+	MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5,
+	MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE    = 1 << 31,
+} mali_mmu_status_bits;
+
+/**
+ * Definition of the MMU struct
+ * Used to track a MMU unit in the system.
+ * Contains information about the mapping of the registers
+ */
+struct mali_mmu_core {
+	struct mali_hw_core hw_core; /**< Common for all HW cores */
+	_mali_osk_irq_t *irq;        /**< IRQ handler */
+};
+
+_mali_osk_errcode_t mali_mmu_initialize(void);
+
+void mali_mmu_terminate(void);
+
+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual);
+void mali_mmu_delete(struct mali_mmu_core *mmu);
+
+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu);
+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu);
+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu);
+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address);
+
+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir);
+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu);
+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu);
+
+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu);
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	}
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+
+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT);
+}
+
+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu)
+{
+	mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS);
+}
+
+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu)
+{
+	return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR);
+}
+
+#endif /* __MALI_MMU_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.c b/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.c
new file mode 100644
index 000000000000..126fd77ec9c9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_mmu_page_directory.h"
+#include "mali_memory.h"
+#include "mali_l2_cache.h"
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data);
+
+u32 mali_allocate_empty_page(mali_io_address *virt_addr)
+{
+	_mali_osk_errcode_t err;
+	mali_io_address mapping;
+	mali_dma_addr address;
+
+	if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) {
+		/* Allocation failed */
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n"));
+		return 0;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	err = fill_page(mapping, 0);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_mmu_release_table_page(address, mapping);
+		MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n"));
+		return 0;
+	}
+
+	*virt_addr = mapping;
+	return address;
+}
+
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr)
+{
+	if (MALI_INVALID_PAGE != address) {
+		mali_mmu_release_table_page(address, virt_addr);
+	}
+}
+
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	_mali_osk_errcode_t err;
+
+	err = mali_mmu_get_table_page(data_page, data_page_mapping);
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mmu_get_table_page(page_table, page_table_mapping);
+		if (_MALI_OSK_ERR_OK == err) {
+			err = mali_mmu_get_table_page(page_directory, page_directory_mapping);
+			if (_MALI_OSK_ERR_OK == err) {
+				fill_page(*data_page_mapping, 0);
+				fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT);
+				fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT);
+				MALI_SUCCESS;
+			}
+			mali_mmu_release_table_page(*page_table, *page_table_mapping);
+			*page_table = MALI_INVALID_PAGE;
+		}
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+	}
+	return err;
+}
+
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping)
+{
+	if (MALI_INVALID_PAGE != *page_directory) {
+		mali_mmu_release_table_page(*page_directory, *page_directory_mapping);
+		*page_directory = MALI_INVALID_PAGE;
+		*page_directory_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *page_table) {
+		mali_mmu_release_table_page(*page_table, *page_table_mapping);
+		*page_table = MALI_INVALID_PAGE;
+		*page_table_mapping = NULL;
+	}
+
+	if (MALI_INVALID_PAGE != *data_page) {
+		mali_mmu_release_table_page(*data_page, *data_page_mapping);
+		*data_page = MALI_INVALID_PAGE;
+		*data_page_mapping = NULL;
+	}
+}
+
+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data)
+{
+	int i;
+	MALI_DEBUG_ASSERT_POINTER(mapping);
+
+	for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) {
+		_mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data);
+	}
+	_mali_osk_mem_barrier();
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	_mali_osk_errcode_t err;
+	mali_io_address pde_mapping;
+	mali_dma_addr pde_phys;
+	int i, page_count;
+	u32 start_address;
+	if (last_pde < first_pde)
+		return _MALI_OSK_ERR_INVALID_ARGS;
+
+	for (i = first_pde; i <= last_pde; i++) {
+		if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+						 i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) {
+			/* Page table not present */
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+			MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]);
+
+			err = mali_mmu_get_table_page(&pde_phys, &pde_mapping);
+			if (_MALI_OSK_ERR_OK != err) {
+				MALI_PRINT_ERROR(("Failed to allocate page table page.\n"));
+				return err;
+			}
+			pagedir->page_entries_mapped[i] = pde_mapping;
+
+			/* Update PDE, mark as present */
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32),
+							pde_phys | MALI_MMU_FLAGS_PRESENT);
+
+			MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]);
+		}
+
+		if (first_pde == last_pde) {
+			pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE;
+		} else if (i == first_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else if (i == last_pde) {
+			start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE;
+			page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE;
+			pagedir->page_entries_usage_count[i] += page_count;
+		} else {
+			pagedir->page_entries_usage_count[i] = 1024;
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size)
+{
+	int i;
+	const int first_pte = MALI_MMU_PTE_ENTRY(mali_address);
+	const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1);
+
+	for (i = first_pte; i <= last_pte; i++) {
+		_mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0);
+	}
+}
+
+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index)
+{
+	return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				       index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK);
+}
+
+
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size)
+{
+	const int first_pde = MALI_MMU_PDE_ENTRY(mali_address);
+	const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1);
+	u32 left = size;
+	int i;
+	mali_bool pd_changed = MALI_FALSE;
+	u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */
+	u32 num_pages_inv = 0;
+	mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */
+
+	/* For all page directory entries in range. */
+	for (i = first_pde; i <= last_pde; i++) {
+		u32 size_in_pde, offset;
+
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]);
+		MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]);
+
+		/* Offset into page table, 0 if mali_address is 4MiB aligned */
+		offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1));
+		if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) {
+			size_in_pde = left;
+		} else {
+			size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset;
+		}
+
+		pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE;
+
+		/* If entire page table is unused, free it */
+		if (0 == pagedir->page_entries_usage_count[i]) {
+			u32 page_phys;
+			void *page_virt;
+			MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n"));
+			/* last reference removed, no need to zero out each PTE  */
+
+			page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32)));
+			page_virt = pagedir->page_entries_mapped[i];
+			pagedir->page_entries_mapped[i] = NULL;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+
+			mali_mmu_release_table_page(page_phys, page_virt);
+			pd_changed = MALI_TRUE;
+		} else {
+			MALI_DEBUG_ASSERT(num_pages_inv < 2);
+			if (num_pages_inv < 2) {
+				pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i);
+				num_pages_inv++;
+			} else {
+				invalidate_all = MALI_TRUE;
+			}
+
+			/* If part of the page table is still in use, zero the relevant PTEs */
+			mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde);
+		}
+
+		left -= size_in_pde;
+		mali_address += size_in_pde;
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* L2 pages invalidation */
+	if (MALI_TRUE == pd_changed) {
+		MALI_DEBUG_ASSERT(num_pages_inv < 3);
+		if (num_pages_inv < 3) {
+			pages_to_invalidate[num_pages_inv] = pagedir->page_directory;
+			num_pages_inv++;
+		} else {
+			invalidate_all = MALI_TRUE;
+		}
+	}
+
+	if (invalidate_all) {
+		mali_l2_cache_invalidate_all();
+	} else {
+		mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv);
+	}
+
+	MALI_SUCCESS;
+}
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void)
+{
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	mali_dma_addr phys;
+
+	pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory));
+	if (NULL == pagedir) {
+		return NULL;
+	}
+
+	err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped);
+	if (_MALI_OSK_ERR_OK != err) {
+		_mali_osk_free(pagedir);
+		return NULL;
+	}
+
+	pagedir->page_directory = (u32)phys;
+
+	/* Zero page directory */
+	fill_page(pagedir->page_directory_mapped, 0);
+
+	return pagedir;
+}
+
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir)
+{
+	const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]);
+	int i;
+
+	/* Free referenced page tables and zero PDEs. */
+	for (i = 0; i < num_page_table_entries; i++) {
+		if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32(
+				pagedir->page_directory_mapped,
+				sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) {
+			mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+					     i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK;
+			_mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0);
+			mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]);
+		}
+	}
+	_mali_osk_write_mem_barrier();
+
+	/* Free the page directory page. */
+	mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped);
+
+	_mali_osk_free(pagedir);
+}
+
+
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits)
+{
+	u32 end_address = mali_address + size;
+	u32 mali_phys = (u32)phys_address;
+
+	/* Map physical pages into MMU page tables */
+	for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) {
+		MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]);
+		_mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)],
+						MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32),
+						mali_phys | permission_bits);
+	}
+}
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr)
+{
+#if defined(DEBUG)
+	u32 pde_index, pte_index;
+	u32 pde, pte;
+
+	pde_index = MALI_MMU_PDE_ENTRY(fault_addr);
+	pte_index = MALI_MMU_PTE_ENTRY(fault_addr);
+
+
+	pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+				     pde_index * sizeof(u32));
+
+
+	if (pde & MALI_MMU_FLAGS_PRESENT) {
+		u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde);
+
+		pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index],
+					     pte_index * sizeof(u32));
+
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n"
+				     "\t\tPTE: %08x, page %08x is %s\n",
+				     fault_addr, pte_addr, pte,
+				     MALI_MMU_ENTRY_ADDRESS(pte),
+				     pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present"));
+	} else {
+		MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n",
+				     fault_addr, pde));
+	}
+#else
+	MALI_IGNORE(pagedir);
+	MALI_IGNORE(fault_addr);
+#endif
+}
+
+/* For instrumented */
+struct dump_info {
+	u32 buffer_left;
+	u32 register_writes_size;
+	u32 page_table_dump_size;
+	u32 *buffer;
+};
+
+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info)
+{
+	if (NULL != info) {
+		info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */
+
+		if (NULL != info->buffer) {
+			/* check that we have enough space */
+			if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = where;
+			info->buffer++;
+
+			*info->buffer = what;
+			info->buffer++;
+
+			info->buffer_left -= sizeof(u32) * 2;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info)
+{
+	if (NULL != info) {
+		/* 4096 for the page and 4 bytes for the address */
+		const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4;
+		const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE;
+		const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4;
+
+		info->page_table_dump_size += dump_size_in_bytes;
+
+		if (NULL != info->buffer) {
+			if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM);
+
+			*info->buffer = phys_addr;
+			info->buffer++;
+
+			_mali_osk_memcpy(info->buffer, page, page_size_in_bytes);
+			info->buffer += page_size_in_elements;
+
+			info->buffer_left -= dump_size_in_bytes;
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_DEBUG_ASSERT_POINTER(pagedir);
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	if (NULL != pagedir->page_directory_mapped) {
+		int i;
+
+		MALI_CHECK_NO_ERROR(
+			mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info)
+		);
+
+		for (i = 0; i < 1024; i++) {
+			if (NULL != pagedir->page_entries_mapped[i]) {
+				MALI_CHECK_NO_ERROR(
+					mali_mmu_dump_page(pagedir->page_entries_mapped[i],
+							   _mali_osk_mem_ioread32(pagedir->page_directory_mapped,
+									   i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info)
+				);
+			}
+		}
+	}
+
+	MALI_SUCCESS;
+}
+
+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info)
+{
+	MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory,
+				     "set the page directory address", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info));
+	MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info));
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+	args->size = info.register_writes_size + info.page_table_dump_size;
+	MALI_SUCCESS;
+}
+
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args)
+{
+	struct dump_info info = { 0, 0, 0, NULL };
+	struct mali_session_data *session_data;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session_data = (struct mali_session_data *)(uintptr_t)(args->ctx);
+	MALI_DEBUG_ASSERT_POINTER(session_data);
+
+	info.buffer_left = args->size;
+	info.buffer = (u32 *)(uintptr_t)args->buffer;
+
+	args->register_writes = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info));
+
+	args->page_table_dump = (uintptr_t)info.buffer;
+	MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info));
+
+	args->register_writes_size = info.register_writes_size;
+	args->page_table_dump_size = info.page_table_dump_size;
+
+	MALI_SUCCESS;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.h b/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.h
new file mode 100644
index 000000000000..561fb60b9803
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_mmu_page_directory.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__
+#define __MALI_MMU_PAGE_DIRECTORY_H__
+
+#include "mali_osk.h"
+
+/**
+ * Size of an MMU page in bytes
+ */
+#define MALI_MMU_PAGE_SIZE 0x1000
+
+/*
+ * Size of the address space referenced by a page table page
+ */
+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */
+
+/**
+ * Page directory index from address
+ * Calculates the page directory index from the given address
+ */
+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF)
+
+/**
+ * Page table index from address
+ * Calculates the page table index from the given address
+ */
+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF)
+
+/**
+ * Extract the memory address from an PDE/PTE entry
+ */
+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00)
+
+#define MALI_INVALID_PAGE ((u32)(~0))
+
+/**
+ *
+ */
+typedef enum mali_mmu_entry_flags {
+	MALI_MMU_FLAGS_PRESENT = 0x01,
+	MALI_MMU_FLAGS_READ_PERMISSION = 0x02,
+	MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04,
+	MALI_MMU_FLAGS_OVERRIDE_CACHE  = 0x8,
+	MALI_MMU_FLAGS_WRITE_CACHEABLE  = 0x10,
+	MALI_MMU_FLAGS_WRITE_ALLOCATE  = 0x20,
+	MALI_MMU_FLAGS_WRITE_BUFFERABLE  = 0x40,
+	MALI_MMU_FLAGS_READ_CACHEABLE  = 0x80,
+	MALI_MMU_FLAGS_READ_ALLOCATE  = 0x100,
+	MALI_MMU_FLAGS_MASK = 0x1FF,
+} mali_mmu_entry_flags;
+
+
+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \
+		MALI_MMU_FLAGS_PRESENT | \
+		MALI_MMU_FLAGS_READ_PERMISSION |  \
+		MALI_MMU_FLAGS_WRITE_PERMISSION | \
+		MALI_MMU_FLAGS_OVERRIDE_CACHE | \
+		MALI_MMU_FLAGS_WRITE_CACHEABLE | \
+		MALI_MMU_FLAGS_WRITE_BUFFERABLE | \
+		MALI_MMU_FLAGS_READ_CACHEABLE | \
+		MALI_MMU_FLAGS_READ_ALLOCATE )
+
+#define MALI_MMU_FLAGS_DEFAULT ( \
+				 MALI_MMU_FLAGS_PRESENT | \
+				 MALI_MMU_FLAGS_READ_PERMISSION |  \
+				 MALI_MMU_FLAGS_WRITE_PERMISSION )
+
+
+struct mali_page_directory {
+	u32 page_directory; /**< Physical address of the memory session's page directory */
+	mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */
+
+	mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */
+	u32   page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */
+};
+
+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range)  */
+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size);
+
+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */
+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address,
+			     mali_dma_addr phys_address, u32 size, u32 permission_bits);
+
+u32 mali_allocate_empty_page(mali_io_address *virtual);
+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr);
+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory,
+		mali_io_address *page_directory_mapping,
+		mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+		mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+void mali_destroy_fault_flush_pages(
+	mali_dma_addr *page_directory, mali_io_address *page_directory_mapping,
+	mali_dma_addr *page_table, mali_io_address *page_table_mapping,
+	mali_dma_addr *data_page, mali_io_address *data_page_mapping);
+
+struct mali_page_directory *mali_mmu_pagedir_alloc(void);
+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir);
+
+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr);
+
+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_osk.h b/drivers/gpu/arm/utgard/common/mali_osk.h
new file mode 100644
index 000000000000..4c9e57cba18f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk.h
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk.h
+ * Defines the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_H__
+#define __MALI_OSK_H__
+
+#include "mali_osk_types.h"
+#include "mali_osk_specific.h"           /* include any per-os specifics */
+#include "mali_osk_locks.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+#ifdef DEBUG
+/** @brief Macro for asserting that the current thread holds a given lock
+ */
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid());
+
+/** @brief returns a lock's owner (thread id) if debugging is enabled
+ */
+#else
+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0)
+#endif
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Find the containing structure of another structure
+ *
+ * This is the reverse of the operation 'offsetof'. This means that the
+ * following condition is satisfied:
+ *
+ *   ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member )
+ *
+ * When ptr is of type 'type'.
+ *
+ * Its purpose it to recover a larger structure that has wrapped a smaller one.
+ *
+ * @note no type or memory checking occurs to ensure that a wrapper structure
+ * does in fact exist, and that it is being recovered with respect to the
+ * correct member.
+ *
+ * @param ptr the pointer to the member that is contained within the larger
+ * structure
+ * @param type the type of the structure that contains the member
+ * @param member the name of the member in the structure that ptr points to.
+ * @return a pointer to a \a type object which contains \a member, as pointed
+ * to by \a ptr.
+ */
+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \
+	((type *)( ((char *)ptr) - offsetof(type,member) ))
+
+/** @addtogroup _mali_osk_wq
+ * @{ */
+
+/** @brief Initialize work queues (for deferred work)
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_wq_init(void);
+
+/** @brief Terminate work queues (for deferred work)
+ */
+void _mali_osk_wq_term(void);
+
+/** @brief Create work in the work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, \a handler will be called with \a data as the argument.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work()
+ * when no longer needed.
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief A high priority version of \a _mali_osk_wq_create_work()
+ *
+ * Creates a work object which can be scheduled in the high priority work queue.
+ *
+ * This is unfortunately needed to get low latency scheduling of the Mali cores.  Normally we would
+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map
+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires
+ * taking a mutex.
+ *
+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we
+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs
+ * random things instead of starting the next GPU job when the GPU is idle.  So setting the gpu
+ * scheduler to high priority does give a visually more responsive system.
+ *
+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri()
+ */
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will flush the work queue to ensure that the work handler will not
+ * be called after deletion.
+ */
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the work handler
+ *
+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls
+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the
+ * work handler will be scheduled to run at some point in the future.
+ *
+ * Typically this is called by the IRQ upper-half to defer further processing of
+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work
+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer
+ * callbacks also use this mechanism, because they are treated as though they
+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more
+ * information.
+ *
+ * Code that operates in a kernel-process context (with no IRQ context
+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The
+ * advantage over direct calling is that deferred calling allows the caller and
+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not
+ * deadlock just by using this mechanism.
+ *
+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to
+ * allow for more than one thread to make a deferred call. Therfore, if it is
+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too.
+ * 'K' is a number that is implementation-specific.
+ *
+ * _mali_osk_wq_schedule_work() is guaranteed to not block on:
+ * - enqueuing a deferred call request.
+ * - the completion of the work handler.
+ *
+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work()
+ * blocked, then it would cause a deadlock when the following two conditions
+ * hold:
+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks
+ * a mutex
+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also
+ * holds the same mutex
+ *
+ * @note care must be taken to not overflow the queue that
+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to
+ * ensure that the number of requests made to the queue is bounded. Otherwise,
+ * work will be lost.
+ *
+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer,
+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work
+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers,
+ * callers from a Kernel-process context). The reader is a single thread that
+ * handles all OSK-registered work.
+ *
+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the
+ * work to begin processing.
+ */
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work);
+
+/** @brief Cause a queued, deferred call of the high priority work handler
+ *
+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only
+ * difference that it runs in a high (real time) priority on the system.
+ *
+ * Should only be used as a substitue for doing the same work in interrupts.
+ *
+ * This is allowed to sleep, but the work should be small since it will block
+ * all other applications.
+*/
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work);
+
+/** @brief Flush the work queue
+ *
+ * This will flush the OSK work queue, ensuring all work in the queue has
+ * completed before returning.
+ *
+ * Since this blocks on the completion of work in the work-queue, the
+ * caller of this function \b must \b not hold any mutexes that are taken by
+ * any registered work handler. To do so may cause a deadlock.
+ *
+ */
+void _mali_osk_wq_flush(void);
+
+/** @brief Create work in the delayed work queue
+ *
+ * Creates a work object which can be scheduled in the work queue. When
+ * scheduled, a timer will be start and the \a handler will be called with
+ * \a data as the argument when timer out
+ *
+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work
+ * is scheduled in the queue.
+ *
+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush()
+ * when no longer needed.
+ */
+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data);
+
+/** @brief Delete a work object
+ *
+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will
+ * not be called after deletion.
+ */
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work without waiting for it to finish
+ *
+ * Note that the \a work callback function may still be running on return from
+ * _mali_osk_wq_delayed_cancel_work_async().
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Cancel a delayed work and wait for it to finish
+ *
+ * When this function returns, the \a work was either cancelled or it finished running.
+ *
+ * @param work The delayed work to be cancelled
+ */
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work);
+
+/** @brief Put \a work task in global workqueue after delay
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ *
+ * If \a work was already on a queue, this function will return without doing anything
+ *
+ * @param work job to be done
+ * @param delay number of jiffies to wait or 0 for immediate execution
+ */
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay);
+
+/** @} */ /* end group _mali_osk_wq */
+
+
+/** @addtogroup _mali_osk_irq
+ * @{ */
+
+/** @brief Initialize IRQ handling for a resource
+ *
+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum.
+ * \a data will be passed as argument to the handler when an interrupt occurs.
+ *
+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using
+ * the supplied \a trigger_func and \a ack_func. These functions will also
+ * receive \a data as their argument.
+ *
+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU.
+ * The value -1 has a special meaning which indicates the use of probing, and
+ * trigger_func and ack_func must be non-NULL.
+ * @param uhandler The interrupt handler, corresponding to a ISR handler for
+ * the resource
+ * @param int_data resource specific data, which will be passed to uhandler
+ * @param trigger_func Optional: a function to trigger the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param ack_func Optional: a function to acknowledge the resource's irq, to
+ * probe for the interrupt. Use NULL if irqnum != -1.
+ * @param probe_data resource-specific data, which will be passed to
+ * (if present) trigger_func and ack_func
+ * @param description textual description of the IRQ resource.
+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents
+ * the IRQ handling on this resource. NULL on failure.
+ */
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description);
+
+/** @brief Terminate IRQ handling on a resource.
+ *
+ * This will disable the interrupt from the device, and then waits for any
+ * currently executing IRQ handlers to complete.
+ *
+ * @note If work is deferred to an IRQ bottom-half handler through
+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work
+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work()
+ *
+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the
+ * resource whose IRQ handling is to be terminated.
+ */
+void _mali_osk_irq_term(_mali_osk_irq_t *irq);
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @addtogroup _mali_osk_atomic
+ * @{ */
+
+/** @brief Decrement an atomic counter
+ *
+ * @note It is an error to decrement the counter beyond -(1<<23)
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom);
+
+/** @brief Decrement an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter
+ * @return The new value, after decrement */
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter
+ *
+ * @note It is an error to increment the counter beyond (1<<23)-1
+ *
+ * @param atom pointer to an atomic counter */
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom);
+
+/** @brief Increment an atomic counter, return new value
+ *
+ * @param atom pointer to an atomic counter */
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom);
+
+/** @brief Initialize an atomic counter
+ *
+ * @note the parameter required is a u32, and so signed integers should be
+ * cast to u32.
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the value to initialize the atomic counter.
+ */
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val);
+
+/** @brief Read a value from an atomic counter
+ *
+ * This can only be safely used to determine the value of the counter when it
+ * is guaranteed that other threads will not be modifying the counter. This
+ * makes its usefulness limited.
+ *
+ * @param atom pointer to an atomic counter
+ */
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom);
+
+/** @brief Terminate an atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ */
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom);
+
+/** @brief Assign a new val to atomic counter, and return the old atomic counter
+ *
+ * @param atom pointer to an atomic counter
+ * @param val the new value assign to the atomic counter
+ * @return the old value of the atomic counter
+ */
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val);
+/** @} */  /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_memory OSK Memory Allocation
+ * @{ */
+
+/** @brief Allocate zero-initialized memory.
+ *
+ * Returns a buffer capable of containing at least \a n elements of \a size
+ * bytes each. The buffer is initialized to zero.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * @param n Number of elements to allocate
+ * @param size Size of each element
+ * @return On success, the zero-initialized buffer allocated. NULL on failure
+ */
+void *_mali_osk_calloc(u32 n, u32 size);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * If there is a need for a bigger block of memory (16KB or bigger), then
+ * consider to use _mali_osk_vmalloc() instead, as this function might
+ * map down to a OS function with size limitations.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_malloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc()
+ * must be freed before the application exits. Otherwise,
+ * a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_free(void *ptr);
+
+/** @brief Allocate memory.
+ *
+ * Returns a buffer capable of containing at least \a size bytes. The
+ * contents of the buffer are undefined.
+ *
+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(),
+ * but do support bigger sizes.
+ *
+ * The buffer is suitably aligned for storage and subsequent access of every
+ * type that the compiler supports. Therefore, the pointer to the start of the
+ * buffer may be cast into any pointer type, and be subsequently accessed from
+ * such a pointer, without loss of information.
+ *
+ * When the buffer is no longer in use, it must be freed with _mali_osk_free().
+ * Failure to do so will cause a memory leak.
+ *
+ * @note Most toolchains supply memory allocation functions that meet the
+ * compiler's alignment requirements.
+ *
+ * Remember to free memory using _mali_osk_free().
+ * @param size Number of bytes to allocate
+ * @return On success, the buffer allocated. NULL on failure.
+ */
+void *_mali_osk_valloc(u32 size);
+
+/** @brief Free memory.
+ *
+ * Reclaims the buffer pointed to by the parameter \a ptr for the system.
+ * All memory returned from _mali_osk_valloc() must be freed before the
+ * application exits. Otherwise a memory leak will occur.
+ *
+ * Memory must be freed once. It is an error to free the same non-NULL pointer
+ * more than once.
+ *
+ * It is legal to free the NULL pointer.
+ *
+ * @param ptr Pointer to buffer to free
+ */
+void _mali_osk_vfree(void *ptr);
+
+/** @brief Copies memory.
+ *
+ * Copies the \a len bytes from the buffer pointed by the parameter \a src
+ * directly to the buffer pointed by \a dst.
+ *
+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes.
+ *
+ * @param dst Pointer to the destination array where the content is to be
+ * copied.
+ * @param src Pointer to the source of data to be copied.
+ * @param len Number of bytes to copy.
+ * @return \a dst is always passed through unmodified.
+ */
+void *_mali_osk_memcpy(void *dst, const void *src, u32 len);
+
+/** @brief Fills memory.
+ *
+ * Sets the first \a n bytes of the block of memory pointed to by \a s to
+ * the specified value
+ * @param s Pointer to the block of memory to fill.
+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB)
+ * are used.
+ * @param n Number of bytes to be set to the value.
+ * @return \a s is always passed through unmodified
+ */
+void *_mali_osk_memset(void *s, u32 c, u32 n);
+/** @} */ /* end group _mali_osk_memory */
+
+
+/** @brief Checks the amount of memory allocated
+ *
+ * Checks that not more than \a max_allocated bytes are allocated.
+ *
+ * Some OS bring up an interactive out of memory dialogue when the
+ * system runs out of memory. This can stall non-interactive
+ * apps (e.g. automated test runs). This function can be used to
+ * not trigger the OOM dialogue by keeping allocations
+ * within a certain limit.
+ *
+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE
+ * when at least \a max_allocated bytes are in use.
+ */
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated);
+
+
+/** @addtogroup _mali_osk_low_level_memory
+ * @{ */
+
+/** @brief Issue a memory barrier
+ *
+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint
+ * on memory read and write operations.
+ */
+void _mali_osk_mem_barrier(void);
+
+/** @brief Issue a write memory barrier
+ *
+ * This defines an write memory barrier operation which forces an ordering constraint
+ * on memory write operations.
+ */
+void _mali_osk_write_mem_barrier(void);
+
+/** @brief Map a physically contiguous region into kernel space
+ *
+ * This is primarily used for mapping in registers from resources, and Mali-MMU
+ * page tables. The mapping is only visable from kernel-space.
+ *
+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @param phys CPU-physical base address of the memory to map in. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * map in
+ * @param description A textual description of the memory being mapped in.
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure.
+ */
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Unmap a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_mapioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt an unmap twice
+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion
+ * - unmap more than the range obtained through  _mali_osk_mem_mapioregion
+ * - unmap an address range that was not successfully mapped using
+ * _mali_osk_mem_mapioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in. This must be aligned to the system's page size, which is assumed
+ * to be 4K
+ * @param size The number of bytes that were originally mapped in.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping);
+
+/** @brief Allocate and Map a physically contiguous region into kernel space
+ *
+ * This is used for allocating physically contiguous regions (such as Mali-MMU
+ * page tables) and mapping them into kernel space. The mapping is only
+ * visible from kernel-space.
+ *
+ * The alignment of the returned memory is guaranteed to be at least
+ * _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32
+ *
+ * @note This function is primarily to provide support for OSs that are
+ * incapable of separating the tasks 'allocate physically contiguous memory'
+ * and 'map it into kernel space'
+ *
+ * @param[out] phys CPU-physical base address of memory that was allocated.
+ * (*phys) will be guaranteed to be aligned to at least
+ * _MALI_OSK_CPU_PAGE_SIZE on success.
+ *
+ * @param[in] size the number of bytes of physically contiguous memory to
+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ *
+ * @return On success, a Mali IO address through which the mapped-in
+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified.
+ */
+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size);
+
+/** @brief Free a physically contiguous address range from kernel space.
+ *
+ * The address range should be one previously mapped in through
+ * _mali_osk_mem_allocioregion.
+ *
+ * It is a programming error to do (but not limited to) the following:
+ * - attempt a free twice on the same ioregion
+ * - free only part of a range obtained through _mali_osk_mem_allocioregion
+ * - free more than the range obtained through  _mali_osk_mem_allocioregion
+ * - free an address range that was not successfully mapped using
+ * _mali_osk_mem_allocioregion
+ * - provide a mapping that does not map to phys.
+ *
+ * @param phys CPU-physical base address of the memory that was originally
+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE.
+ * @param size The number of bytes that were originally mapped in, which was
+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE.
+ * @param mapping The Mali IO address through which the mapping is
+ * accessed.
+ */
+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping);
+
+/** @brief Request a region of physically contiguous memory
+ *
+ * This is used to ensure exclusive access to a region of physically contigous
+ * memory.
+ *
+ * It is acceptable to implement this as a stub. However, it is then the job
+ * of the System Integrator to ensure that no other device driver will be using
+ * the physical address ranges used by Mali, while the Mali device driver is
+ * loaded.
+ *
+ * @param phys CPU-physical base address of the memory to request. This must
+ * be aligned to the system's page size, which is assumed to be 4K.
+ * @param size the number of bytes of physically contiguous address space to
+ * request.
+ * @param description A textual description of the memory being requested.
+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable
+ * _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description);
+
+/** @brief Un-request a region of physically contiguous memory
+ *
+ * This is used to release a regious of physically contiguous memory previously
+ * requested through _mali_osk_mem_reqregion, so that other device drivers may
+ * use it. This will be called at time of Mali device driver termination.
+ *
+ * It is a programming error to attempt to:
+ * - unrequest a region twice
+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion
+ * - unrequest more than the range obtained through  _mali_osk_mem_reqregion
+ * - unrequest an address range that was not successfully requested using
+ * _mali_osk_mem_reqregion
+ *
+ * @param phys CPU-physical base address of the memory to un-request. This must
+ * be aligned to the system's page size, which is assumed to be 4K
+ * @param size the number of bytes of physically contiguous address space to
+ * un-request.
+ */
+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size);
+
+/** @brief Read from a location currently mapped in through
+ * _mali_osk_mem_mapioregion
+ *
+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to read from memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to read from
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @return the 32-bit word from the specified location.
+ */
+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion without memory barriers
+ *
+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier.
+ * It is a programming error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val);
+
+/** @brief Write to a location currently mapped in through
+ * _mali_osk_mem_mapioregion with write memory barrier
+ *
+ * This write a 32-bit word to a 32-bit aligned location. It is a programming
+ * error to provide unaligned locations, or to write to memory that is not
+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or
+ * _mali_osk_mem_allocioregion().
+ *
+ * @param mapping Mali IO address to write to
+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4
+ * @param val the 32-bit word to write.
+ */
+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val);
+
+/** @brief Flush all CPU caches
+ *
+ * This should only be implemented if flushing of the cache is required for
+ * memory mapped in through _mali_osk_mem_mapregion.
+ */
+void _mali_osk_cache_flushall(void);
+
+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory
+ *
+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer)
+ * after allocating uncached mapped memory.
+ *
+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory.
+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches.
+ * This is required for MALI to have the correct view of the memory.
+ */
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size);
+
+/** @brief Safely copy as much data as possible from src to dest
+ *
+ * Do not crash if src or dest isn't available.
+ *
+ * @param dest Destination buffer (limited to user space mapped Mali memory)
+ * @param src Source buffer
+ * @param size Number of bytes to copy
+ * @return Number of bytes actually copied
+ */
+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size);
+
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+
+/** @addtogroup _mali_osk_notification
+ *
+ * User space notification framework
+ *
+ * Communication with user space of asynchronous events is performed through a
+ * synchronous call to the \ref u_k_api.
+ *
+ * Since the events are asynchronous, the events have to be queued until a
+ * synchronous U/K API call can be made by user-space. A U/K API call might also
+ * be received before any event has happened. Therefore the notifications the
+ * different subsystems wants to send to user space has to be queued for later
+ * reception, or a U/K API call has to be blocked until an event has occured.
+ *
+ * Typical uses of notifications are after running of jobs on the hardware or
+ * when changes to the system is detected that needs to be relayed to user
+ * space.
+ *
+ * After an event has occured user space has to be notified using some kind of
+ * message. The notification framework supports sending messages to waiting
+ * threads or queueing of messages until a U/K API call is made.
+ *
+ * The notification queue is a FIFO. There are no restrictions on the numbers
+ * of readers or writers in the queue.
+ *
+ * A message contains what user space needs to identifiy how to handle an
+ * event. This includes a type field and a possible type specific payload.
+ *
+ * A notification to user space is represented by a
+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object
+ * using _mali_osk_notification_create(). The buffer given by the
+ * _mali_osk_notification_t::result_buffer field in the object is used to store
+ * any type specific data. The other fields are internal to the queue system
+ * and should not be touched.
+ *
+ * @{ */
+
+/** @brief Create a notification object
+ *
+ * Returns a notification object which can be added to the queue of
+ * notifications pending for user space transfer.
+ *
+ * The implementation will initialize all members of the
+ * \ref _mali_osk_notification_t object. In particular, the
+ * _mali_osk_notification_t::result_buffer member will be initialized to point
+ * to \a size bytes of storage, and that storage will be suitably aligned for
+ * storage of any structure. That is, the created buffer meets the same
+ * requirements as _mali_osk_malloc().
+ *
+ * The notification object must be deleted when not in use. Use
+ * _mali_osk_notification_delete() for deleting it.
+ *
+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t,
+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use
+ * _mali_osk_notification_delete() to free the resources assocaited with a
+ * \ref _mali_osk_notification_t object.
+ *
+ * @param type The notification type
+ * @param size The size of the type specific buffer to send
+ * @return Pointer to a notification object with a suitable buffer, or NULL on error.
+ */
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size);
+
+/** @brief Delete a notification object
+ *
+ * This must be called to reclaim the resources of a notification object. This
+ * includes:
+ * - The _mali_osk_notification_t::result_buffer
+ * - The \ref _mali_osk_notification_t itself.
+ *
+ * A notification object \b must \b not be used after it has been deleted by
+ * _mali_osk_notification_delete().
+ *
+ * In addition, the notification object may not be deleted while it is in a
+ * queue. That is, if it has been placed on a queue with
+ * _mali_osk_notification_queue_send(), then it must not be deleted until
+ * it has been received by a call to _mali_osk_notification_queue_receive().
+ * Otherwise, the queue may be corrupted.
+ *
+ * @param object the notification object to delete.
+ */
+void _mali_osk_notification_delete(_mali_osk_notification_t *object);
+
+/** @brief Create a notification queue
+ *
+ * Creates a notification queue which can be used to queue messages for user
+ * delivery and get queued messages from
+ *
+ * The queue is a FIFO, and has no restrictions on the numbers of readers or
+ * writers.
+ *
+ * When the queue is no longer in use, it must be terminated with
+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a
+ * memory leak.
+ *
+ * @return Pointer to a new notification queue or NULL on error.
+ */
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void);
+
+/** @brief Destroy a notification queue
+ *
+ * Destroys a notification queue and frees associated resources from the queue.
+ *
+ * A notification queue \b must \b not be destroyed in the following cases:
+ * - while there are \ref _mali_osk_notification_t objects in the queue.
+ * - while there are writers currently acting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_send() on the queue in the future.
+ * - while there are readers currently waiting upon the queue. That is, while
+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on
+ * the queue, or while a thread may call
+ * \ref _mali_osk_notification_queue_receive() on the queue in the future.
+ *
+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and
+ * deleted by the code that makes use of the notification queues, since only
+ * they know the structure of the _mali_osk_notification_t::result_buffer
+ * (even if it may only be a flat sturcture).
+ *
+ * @note Since the queue is a FIFO, the code using notification queues may
+ * create its own 'flush' type of notification, to assist in flushing the
+ * queue.
+ *
+ * Once the queue has been destroyed, it must not be used again.
+ *
+ * @param queue The queue to destroy
+ */
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue);
+
+/** @brief Schedule notification for delivery
+ *
+ * When a \ref _mali_osk_notification_t object has been created successfully
+ * and set up, it may be added to the queue of objects waiting for user space
+ * transfer.
+ *
+ * The sending will not block if the queue is full.
+ *
+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different
+ * queues at the same time, or enqueued twice onto a single queue before
+ * reception. However, it is acceptable for it to be requeued \em after reception
+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue.
+ *
+ * Again, requeuing must also not enqueue onto two different queues at the same
+ * time, or enqueue onto the same queue twice before reception.
+ *
+ * @param queue The notification queue to add this notification to
+ * @param object The entry to add
+ */
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object);
+
+/** @brief Receive a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the thread will sleep until one becomes ready.
+ * Therefore, notifications may not be received into an
+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed).
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @brief Dequeues a notification from a queue
+ *
+ * Receives a single notification from the given queue.
+ *
+ * If no notifciations are ready the function call will return an error code.
+ *
+ * @param queue The queue to receive from
+ * @param result Pointer to storage of a pointer of type
+ * \ref _mali_osk_notification_t*. \a result will be written to such that the
+ * expression \a (*result) will evaluate to a pointer to a valid
+ * \ref _mali_osk_notification_t object, or NULL if none were received.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty.
+ */
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result);
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @addtogroup _mali_osk_timer
+ *
+ * Timers use the OS's representation of time, which are 'ticks'. This is to
+ * prevent aliasing problems between the internal timer time, and the time
+ * asked for.
+ *
+ * @{ */
+
+/** @brief Initialize a timer
+ *
+ * Allocates resources for a new timer, and initializes them. This does not
+ * start the timer.
+ *
+ * @return a pointer to the allocated timer object, or NULL on failure.
+ */
+_mali_osk_timer_t *_mali_osk_timer_init(void);
+
+/** @brief Start a timer
+ *
+ * It is an error to start a timer without setting the callback via
+ * _mali_osk_timer_setcallback().
+ *
+ * It is an error to use this to start an already started timer.
+ *
+ * The timer will expire in \a ticks_to_expire ticks, at which point, the
+ * callback function will be invoked with the callback-specific data,
+ * as registered by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to start
+ * @param ticks_to_expire the amount of time in ticks for the timer to run
+ * before triggering.
+ */
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Modify a timer
+ *
+ * Set the relative time at which a timer will expire, and start it if it is
+ * stopped. If \a ticks_to_expire 0 the timer fires immediately.
+ *
+ * It is an error to modify a timer without setting the callback via
+ *  _mali_osk_timer_setcallback().
+ *
+ * The timer will expire at \a ticks_to_expire from the time of the call, at
+ * which point, the callback function will be invoked with the
+ * callback-specific data, as set by _mali_osk_timer_setcallback().
+ *
+ * @param tim the timer to modify, and start if necessary
+ * @param ticks_to_expire the \em absolute time in ticks at which this timer
+ * should trigger.
+ *
+ */
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire);
+
+/** @brief Stop a timer, and block on its completion.
+ *
+ * Stop the timer. When the function returns, it is guaranteed that the timer's
+ * callback will not be running on any CPU core.
+ *
+ * Since stoping the timer blocks on compeletion of the callback, the callback
+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will
+ * occur.
+ *
+ * @note While the callback itself is guaranteed to not be running, work
+ * enqueued on the work-queue by the timer (with
+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and
+ * work handler must take this into account.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ *
+ */
+void _mali_osk_timer_del(_mali_osk_timer_t *tim);
+
+/** @brief Stop a timer.
+ *
+ * Stop the timer. When the function returns, the timer's callback may still be
+ * running on any CPU core.
+ *
+ * It is legal to stop an already stopped timer.
+ *
+ * @param tim the timer to stop.
+ */
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim);
+
+/** @brief Check if timer is pending.
+ *
+ * Check if timer is active.
+ *
+ * @param tim the timer to check
+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active
+ */
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim);
+
+/** @brief Set a timer's callback parameters.
+ *
+ * This must be called at least once before a timer is started/modified.
+ *
+ * After a timer has been stopped or expires, the callback remains set. This
+ * means that restarting the timer will call the same function with the same
+ * parameters on expiry.
+ *
+ * @param tim the timer to set callback on.
+ * @param callback Function to call when timer expires
+ * @param data Function-specific data to supply to the function on expiry.
+ */
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data);
+
+/** @brief Terminate a timer, and deallocate resources.
+ *
+ * The timer must first be stopped by calling _mali_osk_timer_del().
+ *
+ * It is a programming error for _mali_osk_timer_term() to be called on:
+ * - timer that is currently running
+ * - a timer that is currently executing its callback.
+ *
+ * @param tim the timer to deallocate.
+ */
+void _mali_osk_timer_term(_mali_osk_timer_t *tim);
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @defgroup _mali_osk_time OSK Time functions
+ *
+ * \ref _mali_osk_time use the OS's representation of time, which are
+ * 'ticks'. This is to prevent aliasing problems between the internal timer
+ * time, and the time asked for.
+ *
+ * OS tick time is measured as a u32. The time stored in a u32 may either be
+ * an absolute time, or a time delta between two events. Whilst it is valid to
+ * use math opeartors to \em change the tick value represented as a u32, it
+ * is often only meaningful to do such operations on time deltas, rather than
+ * on absolute time. However, it is meaningful to add/subtract time deltas to
+ * absolute times.
+ *
+ * Conversion between tick time and milliseconds (ms) may not be loss-less,
+ * and are \em implementation \em depenedant.
+ *
+ * Code use OS time must take this into account, since:
+ * - a small OS time may (or may not) be rounded
+ * - a large time may (or may not) overflow
+ *
+ * @{ */
+
+/** @brief Return whether ticka occurs after or at the same time as  tickb
+ *
+ * Systems where ticks can wrap must handle that.
+ *
+ * @param ticka ticka
+ * @param tickb tickb
+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb.
+ */
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb);
+
+/** @brief Convert milliseconds to OS 'ticks'
+ *
+ * @param ms time interval in milliseconds
+ * @return the corresponding time interval in OS ticks.
+ */
+unsigned long _mali_osk_time_mstoticks(u32 ms);
+
+/** @brief Convert OS 'ticks' to milliseconds
+ *
+ * @param ticks time interval in OS ticks.
+ * @return the corresponding time interval in milliseconds
+ */
+u32 _mali_osk_time_tickstoms(unsigned long ticks);
+
+
+/** @brief Get the current time in OS 'ticks'.
+ * @return the current time in OS 'ticks'.
+ */
+unsigned long _mali_osk_time_tickcount(void);
+
+/** @brief Cause a microsecond delay
+ *
+ * The delay will have microsecond resolution, and is necessary for correct
+ * operation of the driver. At worst, the delay will be \b at least \a usecs
+ * microseconds, and so may be (significantly) more.
+ *
+ * This function may be implemented as a busy-wait, which is the most sensible
+ * implementation. On OSs where there are situations in which a thread must not
+ * sleep, this is definitely implemented as a busy-wait.
+ *
+ * @param usecs the number of microseconds to wait for.
+ */
+void _mali_osk_time_ubusydelay(u32 usecs);
+
+/** @brief Return time in nano seconds, since any given reference.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_time_get_ns(void);
+
+/** @brief Return time in nano seconds, since boot time.
+ *
+ * @return Time in nano seconds
+ */
+u64 _mali_osk_boot_time_get_ns(void);
+
+/** @} */ /* end group _mali_osk_time */
+
+/** @defgroup _mali_osk_math OSK Math
+ * @{ */
+
+/** @brief Count Leading Zeros (Little-endian)
+ *
+ * @note This function must be implemented to support the reference
+ * implementation of _mali_osk_find_first_zero_bit, as defined in
+ * mali_osk_bitops.h.
+ *
+ * @param val 32-bit words to count leading zeros on
+ * @return the number of leading zeros.
+ */
+u32 _mali_osk_clz(u32 val);
+
+/** @brief find last (most-significant) bit set
+ *
+ * @param val 32-bit words to count last bit set on
+ * @return last bit set.
+ */
+u32 _mali_osk_fls(u32 val);
+
+/** @} */ /* end group _mali_osk_math */
+
+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+
+/** @brief Initialize an empty Wait Queue */
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.
+ */
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data);
+
+/** @brief Sleep if condition is false
+ *
+ * @param queue the queue to use
+ * @param condition function pointer to a boolean function
+ * @param data data parameter for condition function
+ * @param timeout timeout in ms
+ *
+ * Put thread to sleep if the given \a condition function returns false. When
+ * being asked to wake up again, the condition will be re-checked and the
+ * thread only woken up if the condition is now true.  Will return if time
+ * exceeds timeout.
+ */
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout);
+
+/** @brief Wake up all threads in wait queue if their respective conditions are
+ * true
+ *
+ * @param queue the queue whose threads should be woken up
+ *
+ * Wake up all threads in wait queue \a queue whose condition is now true.
+ */
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue);
+
+/** @brief terminate a wait queue
+ *
+ * @param queue the queue to terminate.
+ */
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue);
+/** @} */ /* end group _mali_osk_wait_queue */
+
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Output a device driver debug message.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_dbgmsg(const char *fmt, ...);
+
+/** @brief Print fmt into buf.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param buf a pointer to the result buffer
+ * @param size the total number of bytes allowed to write to \a buf
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ * @return The number of bytes written to \a buf
+ */
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...);
+
+/** @brief Print fmt into print_ctx.
+ *
+ * The interpretation of \a fmt is the same as the \c format parameter in
+ * _mali_osu_vsnprintf().
+ *
+ * @param print_ctx a pointer to the result file buffer
+ * @param fmt a _mali_osu_vsnprintf() style format string
+ * @param ... a variable-number of parameters suitable for \a fmt
+ */
+void _mali_osk_ctxprintf(_mali_osk_print_ctx *print_ctx, const char *fmt, ...);
+
+/** @brief Abnormal process abort.
+ *
+ * Terminates the caller-process if this function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h.
+ *
+ * This function will never return - because to continue from a Debug assert
+ * could cause even more problems, and hinder debugging of the initial problem.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_abort(void);
+
+/** @brief Sets breakpoint at point where function is called.
+ *
+ * This function will be called from Debug assert-macros in mali_kernel_common.h,
+ * to assist in debugging. If debugging at this level is not required, then this
+ * function may be implemented as a stub.
+ *
+ * This function is only used in Debug builds, and is not used in Release builds.
+ */
+void _mali_osk_break(void);
+
+/** @brief Return an identificator for calling process.
+ *
+ * @return Identificator for calling process.
+ */
+u32 _mali_osk_get_pid(void);
+
+/** @brief Return an name for calling process.
+ *
+ * @return name for calling process.
+ */
+char *_mali_osk_get_comm(void);
+
+/** @brief Return an identificator for calling thread.
+ *
+ * @return Identificator for calling thread.
+ */
+u32 _mali_osk_get_tid(void);
+
+
+/** @brief Take a reference to the power manager system for the Mali device (synchronously).
+ *
+ * When function returns successfully, Mali is ON.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void);
+
+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously).
+ *
+ * Mali might not yet be on after this function as returned.
+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync()
+ * to wait for Mali to be powered on.
+ *
+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference.
+ */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void);
+
+/** @brief Release the reference to the external power manger system for the Mali device.
+ *
+ * When reference count reach zero, the cores can be off.
+ *
+ * @note This must be used to release references taken with
+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync().
+ */
+void _mali_osk_pm_dev_ref_put(void);
+
+/** @brief Block until pending PM operations are done
+ */
+void _mali_osk_pm_dev_barrier(void);
+
+/** @} */ /* end group  _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_bitmap OSK Bitmap
+ * @{ */
+
+/** @brief Allocate a unique number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return An unique existence in the bitmap object.
+ */
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Free a interger to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj An number allocated from bitmap object.
+ */
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj);
+
+/** @brief Allocate continuous number from the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @return start number of the continuous number block.
+ */
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt);
+
+/** @brief Free a block of continuous number block to the bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ * @param obj Start number.
+ * @param cnt The size of the continuous number block.
+ */
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt);
+
+/** @brief Available count could be used to allocate in the given bitmap object.
+ *
+ */
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap);
+
+/** @brief Initialize an bitmap object..
+ *
+ * @param bitmap An poiter of uninitialized bitmap object.
+ * @param num Size of thei bitmap object and decide the memory size allocated.
+ * @param reserve start number used to allocate.
+ */
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve);
+
+/** @brief Free the given bitmap object.
+ *
+ * @param bitmap Initialized bitmap object.
+ */
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap);
+/** @} */ /* end group  _mali_osk_bitmap */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Check standard inlines */
+#ifndef MALI_STATIC_INLINE
+#error MALI_STATIC_INLINE not defined on your OS
+#endif
+
+#ifndef MALI_NON_STATIC_INLINE
+#error MALI_NON_STATIC_INLINE not defined on your OS
+#endif
+
+#endif /* __MALI_OSK_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_osk_bitops.h b/drivers/gpu/arm/utgard/common/mali_osk_bitops.h
new file mode 100644
index 000000000000..c1709f94c883
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk_bitops.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitops.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_BITOPS_H__
+#define __MALI_OSK_BITOPS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) &= ~(1 << bit);
+}
+
+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	MALI_DEBUG_ASSERT(NULL != addr);
+
+	(*addr) |= (1 << bit);
+}
+
+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value)
+{
+	MALI_DEBUG_ASSERT(bit < 32);
+	return value & (1 << bit);
+}
+
+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value)
+{
+	u32 inverted;
+	u32 negated;
+	u32 isolated;
+	u32 leading_zeros;
+
+	/* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range  0..31 */
+	inverted = ~value; /* zzz...z1000...0 */
+	/* Using count_trailing_zeros on inverted value -
+	 * See ARM System Developers Guide for details of count_trailing_zeros */
+
+	/* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */
+	negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */
+	/* negated = xxx...x1000...0 */
+
+	isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */
+	/* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it
+	 * Note that the output is zero if value was all 1s */
+
+	leading_zeros = _mali_osk_clz(isolated);
+
+	return 31 - leading_zeros;
+}
+
+
+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations
+ * @{ */
+
+/**
+ * These bit-operations do not work atomically, and so locks must be used if
+ * atomicity is required.
+ *
+ * Reference implementations for Little Endian are provided, and so it should
+ * not normally be necessary to re-implement these. Efficient bit-twiddling
+ * techniques are used where possible, implemented in portable C.
+ *
+ * Note that these reference implementations rely on _mali_osk_clz() being
+ * implemented.
+ */
+
+/** @brief Clear a bit in a sequence of 32-bit words
+ * @param nr bit number to clear, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_clear_bit(nr, addr);
+}
+
+/** @brief Set a bit in a sequence of 32-bit words
+ * @param nr bit number to set, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ */
+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	_mali_internal_set_bit(nr, addr);
+}
+
+/** @brief Test a bit in a sequence of 32-bit words
+ * @param nr bit number to test, starting from the (Little-endian) least
+ * significant bit
+ * @param addr starting point for counting.
+ * @return zero if bit was clear, non-zero if set. Do not rely on the return
+ * value being related to the actual word under test.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr)
+{
+	addr += nr >> 5; /* find the correct word */
+	nr = nr & ((1 << 5) - 1); /* The bit number within the word */
+
+	return _mali_internal_test_bit(nr, *addr);
+}
+
+/* Return maxbit if not found */
+/** @brief Find the first zero bit in a sequence of 32-bit words
+ * @param addr starting point for search.
+ * @param maxbit the maximum number of bits to search
+ * @return the number of the first zero bit found, or maxbit if none were found
+ * in the specified range.
+ */
+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit)
+{
+	u32 total;
+
+	for (total = 0; total < maxbit; total += 32, ++addr) {
+		int result;
+		result = _mali_internal_find_first_zero_bit(*addr);
+
+		/* non-negative signifies the bit was found */
+		if (result >= 0) {
+			total += (u32)result;
+			break;
+		}
+	}
+
+	/* Now check if we reached maxbit or above */
+	if (total >= maxbit) {
+		total = maxbit;
+	}
+
+	return total; /* either the found bit nr, or maxbit if not found */
+}
+/** @} */ /* end group _mali_osk_bitops */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_BITOPS_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_osk_list.h b/drivers/gpu/arm/utgard/common/mali_osk_list.h
new file mode 100644
index 000000000000..22d22446d7a0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk_list.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_list.h
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#ifndef __MALI_OSK_LIST_H__
+#define __MALI_OSK_LIST_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = new_entry;
+	new_entry->next = next;
+	new_entry->prev = prev;
+	prev->next = new_entry;
+}
+
+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** Reference implementations of Doubly-linked Circular Lists are provided.
+ * There is often no need to re-implement these.
+ *
+ * @note The implementation may differ subtly from any lists the OS provides.
+ * For this reason, these lists should not be mixed with OS-specific lists
+ * inside the OSK/UKK implementation. */
+
+/** @brief Initialize a list to be a head of an empty list
+ * @param exp the list to initialize. */
+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp)
+
+/** @brief Define a list variable, which is uninitialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp
+
+/** @brief Define a list variable, which is initialized.
+ * @param exp the name of the variable that the list will be defined as. */
+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp }
+
+/** @brief Initialize a list element.
+ *
+ * All list elements must be initialized before use.
+ *
+ * Do not use on any list element that is present in a list without using
+ * _mali_osk_list_del first, otherwise this will break the list.
+ *
+ * @param list the list element to initialize
+ */
+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/** @brief Insert a single list element after an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the first element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the next
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list, list->next);
+}
+
+/** @brief Insert a single list element before an entry in a list
+ *
+ * As an example, if this is inserted to the head of a list, then this becomes
+ * the last element of the list.
+ *
+ * Do not use to move list elements from one list to another, as it will break
+ * the originating list.
+ *
+ * @param newlist the list element to insert
+ * @param list the list in which to insert. The new element will be the previous
+ * entry in this list
+ */
+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_add(new_entry, list->prev, list);
+}
+
+/** @brief Remove a single element from a list
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will be uninitialized, and so should not be traversed. It must be
+ * initialized before further use.
+ *
+ * @param list the list element to remove.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+}
+
+/** @brief Remove a single element from a list, and re-initialize it
+ *
+ * The element will no longer be present in the list. The removed list element
+ * will initialized, and so can be used as normal.
+ *
+ * @param list the list element to remove and initialize.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list)
+{
+	__mali_osk_list_del(list->prev, list->next);
+	_mali_osk_list_init(list);
+}
+
+/** @brief Determine whether a list is empty.
+ *
+ * An empty list is one that contains a single element that points to itself.
+ *
+ * @param list the list to check.
+ * @return non-zero if the list is empty, and zero otherwise.
+ */
+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list)
+{
+	return list->next == list;
+}
+
+/** @brief Move a list element from one list to another.
+ *
+ * The list element must be initialized.
+ *
+ * As an example, moving a list item to the head of a new list causes this item
+ * to be the first element in the new list.
+ *
+ * @param move the list element to move
+ * @param list the new list into which the element will be inserted, as the next
+ * element in the list.
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list)
+{
+	__mali_osk_list_del(move_entry->prev, move_entry->next);
+	_mali_osk_list_add(move_entry, list);
+}
+
+/** @brief Move an entire list
+ *
+ * The list element must be initialized.
+ *
+ * Allows you to move a list from one list head to another list head
+ *
+ * @param old_list The existing list head
+ * @param new_list The new list head (must be an empty list)
+ */
+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list)
+{
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list));
+	if (!_mali_osk_list_empty(old_list)) {
+		new_list->next = old_list->next;
+		new_list->prev = old_list->prev;
+		new_list->next->prev = new_list;
+		new_list->prev->next = new_list;
+		old_list->next = old_list;
+		old_list->prev = old_list;
+	}
+}
+
+/** @brief Find the containing structure of a list
+ *
+ * When traversing a list, this is used to recover the containing structure,
+ * given that is contains a _mali_osk_list_t member.
+ *
+ * Each list must be of structures of one type, and must link the same members
+ * together, otherwise it will not be possible to correctly recover the
+ * sturctures that the lists link.
+ *
+ * @note no type or memory checking occurs to ensure that a structure does in
+ * fact exist for the list entry, and that it is being recovered with respect
+ * to the correct list member.
+ *
+ * @param ptr the pointer to the _mali_osk_list_t member in this structure
+ * @param type the type of the structure that contains the member
+ * @param member the member of the structure that ptr points to.
+ * @return a pointer to a \a type object which contains the _mali_osk_list_t
+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr.
+ */
+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \
+	_MALI_OSK_CONTAINER_OF(ptr, type, member)
+
+/** @brief Enumerate a list safely
+ *
+ * With this macro, lists can be enumerated in a 'safe' manner. That is,
+ * entries can be deleted from the list without causing an error during
+ * enumeration. To achieve this, a 'temporary' pointer is required, which must
+ * be provided to the macro.
+ *
+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be
+ * followed by a statement or compound-statement which will be executed for
+ * each list entry.
+ *
+ * Upon loop completion, providing that an early out was not taken in the
+ * loop body, then it is guaranteed that ptr->member == list, even if the loop
+ * body never executed.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member)         \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member))
+
+/** @brief Enumerate a list in reverse order safely
+ *
+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that
+ * entries are enumerated in reverse order.
+ *
+ * @param ptr a pointer to an object of type 'type', which points to the
+ * structure that contains the currently enumerated list entry.
+ * @param tmp a pointer to an object of type 'type', which must not be used
+ * inside the list-execution statement.
+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will
+ * begin
+ * @param type the type of the structure that contains the _mali_osk_list_t
+ * member that is part of the list to be enumerated.
+ * @param member the _mali_osk_list_t member of the structure that is part of
+ * the list to be enumerated.
+ */
+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \
+	for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member),      \
+	     tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member);  \
+	     &ptr->member != (list);                                      \
+	     ptr = tmp,                                                   \
+	     tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member))
+
+/** @} */ /* end group _mali_osk_list */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_LIST_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_osk_mali.h b/drivers/gpu/arm/utgard/common/mali_osk_mali.h
new file mode 100644
index 000000000000..b27fb7dd36ed
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk_mali.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.h
+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_MALI_H__
+#define __MALI_OSK_MALI_H__
+
+#include <linux/mali/mali_utgard.h>
+#include <mali_osk.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief Struct with device specific configuration data
+ */
+typedef struct mali_gpu_device_data _mali_osk_device_data;
+
+#if defined(CONFIG_MALI_DT) && !defined(CONFIG_MALI_PLAT_SPECIFIC_DT)
+/** @brief Initialize those device resources when we use device tree
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_resource_initialize(void);
+#endif
+
+/** @brief Find Mali GPU HW resource
+ *
+ * @param addr Address of Mali GPU resource to find
+ * @param res Storage for resource information if resource is found.
+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found
+ */
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res);
+
+
+/** @brief Find Mali GPU HW base address
+ *
+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address.
+ */
+uintptr_t _mali_osk_resource_base_address(void);
+
+/** @brief Find the specific GPU resource.
+ *
+ * @return value
+ * 0x400 if Mali 400 specific GPU resource identified
+ * 0x450 if Mali 450 specific GPU resource identified
+ * 0x470 if Mali 470 specific GPU resource identified
+ *
+ */
+u32 _mali_osk_identify_gpu_resource(void);
+
+/** @brief Retrieve the Mali GPU specific data
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data);
+
+/** @brief Find the pmu domain config from device data.
+ *
+ * @param domain_config_array used to store pmu domain config found in device data.
+ * @param array_size is the size of array domain_config_array.
+ */
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size);
+
+/** @brief Get Mali PMU switch delay
+ *
+ *@return pmu switch delay if it is configured
+ */
+u32 _mali_osk_get_pmu_switch_delay(void);
+
+/** @brief Determines if Mali GPU has been configured with shared interrupts.
+ *
+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not.
+ */
+mali_bool _mali_osk_shared_interrupts(void);
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_MALI_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_osk_profiling.h b/drivers/gpu/arm/utgard/common/mali_osk_profiling.h
new file mode 100644
index 000000000000..10f4dc552b03
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk_profiling.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_OSK_PROFILING_H__
+#define __MALI_OSK_PROFILING_H__
+
+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS)
+
+#include "mali_linux_trace.h"
+#include "mali_profiling_events.h"
+#include "mali_profiling_gator_api.h"
+
+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576
+
+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1)
+
+/** @defgroup _mali_osk_profiling External profiling connectivity
+ * @{ */
+
+/**
+ * Initialize the profiling module.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start);
+
+/*
+ * Terminate the profiling module.
+ */
+void _mali_osk_profiling_term(void);
+
+/**
+ * Stop the profile sampling operation.
+ */
+void _mali_osk_profiling_stop_sampling(u32 pid);
+
+/**
+ * Start recording profiling data
+ *
+ * The specified limit will determine how large the capture buffer is.
+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver.
+ *
+ * @param limit The desired maximum number of events to record on input, the actual maximum on output.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit);
+
+/**
+ * Add an profiling event
+ *
+ * @param event_id The event identificator.
+ * @param data0 First data parameter, depending on event_id specified.
+ * @param data1 Second data parameter, depending on event_id specified.
+ * @param data2 Third data parameter, depending on event_id specified.
+ * @param data3 Fourth data parameter, depending on event_id specified.
+ * @param data4 Fifth data parameter, depending on event_id specified.
+ */
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+/**
+ * Report a hardware counter event.
+ *
+ * @param counter_id The ID of the counter.
+ * @param value The value of the counter.
+ */
+
+/* Call Linux tracepoint directly */
+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value)
+
+/**
+ * Report SW counters
+ *
+ * @param counters array of counter values
+ */
+void _mali_osk_profiling_report_sw_counters(u32 *counters);
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value);
+
+/**
+ * Stop recording profiling data
+ *
+ * @param count Returns the number of recorded events.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count);
+
+/**
+ * Retrieves the number of events that can be retrieved
+ *
+ * @return The number of recorded events that can be retrieved.
+ */
+u32 _mali_osk_profiling_get_count(void);
+
+/**
+ * Retrieve an event
+ *
+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events)
+ * @param timestamp The timestamp for the retrieved event will be stored here.
+ * @param event_id The event ID for the retrieved event will be stored here.
+ * @param data The 5 data values for the retrieved event will be stored here.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+
+/**
+ * Clear the recorded buffer.
+ *
+ * This is needed in order to start another recording.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t _mali_osk_profiling_clear(void);
+
+/**
+ * Checks if a recording of profiling data is in progress
+ *
+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_is_recording(void);
+
+/**
+ * Checks if profiling data is available for retrival
+ *
+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not
+ */
+mali_bool _mali_osk_profiling_have_recording(void);
+
+/** @} */ /* end group _mali_osk_profiling */
+
+#else /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+/* Dummy add_event, for when profiling is disabled. */
+
+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4)
+
+#endif /* defined(CONFIG_MALI400_PROFILING)  && defined(CONFIG_TRACEPOINTS) */
+
+#endif /* __MALI_OSK_PROFILING_H__ */
+
+
diff --git a/drivers/gpu/arm/utgard/common/mali_osk_types.h b/drivers/gpu/arm/utgard/common/mali_osk_types.h
new file mode 100644
index 000000000000..b65ad29e16c0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_osk_types.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_types.h
+ * Defines types of the OS abstraction layer for the kernel device driver (OSK)
+ */
+
+#ifndef __MALI_OSK_TYPES_H__
+#define __MALI_OSK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types
+ * @{ */
+
+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */
+#ifndef __KERNEL__
+typedef unsigned char      u8;
+typedef signed char        s8;
+typedef unsigned short     u16;
+typedef signed short       s16;
+typedef unsigned int       u32;
+typedef signed int         s32;
+typedef unsigned long long u64;
+#define BITS_PER_LONG (sizeof(long)*8)
+#else
+/* Ensure Linux types u32, etc. are defined */
+#include <linux/types.h>
+#endif
+
+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE
+  */
+typedef unsigned long mali_bool;
+
+#ifndef MALI_TRUE
+#define MALI_TRUE ((mali_bool)1)
+#endif
+
+#ifndef MALI_FALSE
+#define MALI_FALSE ((mali_bool)0)
+#endif
+
+#define MALI_HW_CORE_NO_COUNTER     ((u32)-1)
+
+
+#define MALI_S32_MAX 0x7fffffff
+
+/**
+ * @brief OSK Error codes
+ *
+ * Each OS may use its own set of error codes, and may require that the
+ * User/Kernel interface take certain error code. This means that the common
+ * error codes need to be sufficiently rich to pass the correct error code
+ * thorugh from the OSK to U/K layer, across all OSs.
+ *
+ * The result is that some error codes will appear redundant on some OSs.
+ * Under all OSs, the OSK layer must translate native OS error codes to
+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from
+ * _mali_osk_errcode_t codes to native OS error codes.
+ */
+typedef enum {
+	_MALI_OSK_ERR_OK = 0, /**< Success. */
+	_MALI_OSK_ERR_FAULT = -1, /**< General non-success */
+	_MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */
+	_MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */
+	_MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */
+	_MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */
+	_MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */
+	_MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */
+	_MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */
+	_MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */
+} _mali_osk_errcode_t;
+
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wq OSK work queues
+ * @{ */
+
+/** @brief Private type for work objects */
+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t;
+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t;
+
+/** @brief Work queue handler function
+ *
+ * This function type is called when the work is scheduled by the work queue,
+ * e.g. as an IRQ bottom-half handler.
+ *
+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the
+ * work-queue and work handlers.
+ *
+ * @param arg resource-specific data
+ */
+typedef void (*_mali_osk_wq_work_handler_t)(void *arg);
+
+/* @} */ /* end group _mali_osk_wq */
+
+/** @defgroup _mali_osk_irq OSK IRQ handling
+ * @{ */
+
+/** @brief Private type for IRQ handling objects */
+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t;
+
+/** @brief Optional function to trigger an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data */
+typedef void (*_mali_osk_irq_trigger_t)(void *arg);
+
+/** @brief Optional function to acknowledge an irq from a resource
+ *
+ * This function is implemented by the common layer to allow probing of a resource's IRQ.
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg);
+
+/** @brief IRQ 'upper-half' handler callback.
+ *
+ * This function is implemented by the common layer to do the initial handling of a
+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum
+ * work necessary before handing off to an IST.
+ *
+ * The communication of the resource-specific data from the ISR to the IST is
+ * handled by the OSK implementation.
+ *
+ * On most systems, the IRQ upper-half handler executes in IRQ context.
+ * Therefore, the system may have restrictions about what can be done in this
+ * context
+ *
+ * If an IRQ upper-half handler requires more work to be done than can be
+ * acheived in an IRQ context, then it may defer the work with
+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for
+ * more information.
+ *
+ * @param arg resource-specific data
+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable
+ * _mali_osk_errcode_t otherwise.
+ */
+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg);
+
+
+/** @} */ /* end group _mali_osk_irq */
+
+
+/** @defgroup _mali_osk_atomic OSK Atomic counters
+ * @{ */
+
+/** @brief Public type of atomic counters
+ *
+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value.
+ * On others, it could be encapsulating an object stored elsewhere.
+ *
+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used
+ * for all accesses to the variable's value, even if atomicity is not required.
+ * Do not access u.val or u.obj directly.
+ */
+typedef struct {
+	union {
+		u32 val;
+		void *obj;
+	} u;
+} _mali_osk_atomic_t;
+/** @} */ /* end group _mali_osk_atomic */
+
+
+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks
+ * @{ */
+
+
+/** @brief OSK Mutual Exclusion Lock ordered list
+ *
+ * This lists the various types of locks in the system and is used to check
+ * that locks are taken in the correct order.
+ *
+ * - Holding more than one lock of the same order at the same time is not
+ *   allowed.
+ * - Taking a lock of a lower order than the highest-order lock currently held
+ *   is not allowed.
+ *
+ */
+typedef enum {
+	/*  ||    Locks    ||  */
+	/*  ||   must be   ||  */
+	/* _||_  taken in _||_ */
+	/* \  /    this   \  / */
+	/*  \/    order!   \/  */
+
+	_MALI_OSK_LOCK_ORDER_FIRST = 0,
+
+	_MALI_OSK_LOCK_ORDER_SESSIONS,
+	_MALI_OSK_LOCK_ORDER_MEM_SESSION,
+	_MALI_OSK_LOCK_ORDER_MEM_INFO,
+	_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE,
+	_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP,
+	_MALI_OSK_LOCK_ORDER_PM_EXECUTION,
+	_MALI_OSK_LOCK_ORDER_EXECUTOR,
+	_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER,
+	_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED,
+	_MALI_OSK_LOCK_ORDER_PROFILING,
+	_MALI_OSK_LOCK_ORDER_L2,
+	_MALI_OSK_LOCK_ORDER_L2_COMMAND,
+	_MALI_OSK_LOCK_ORDER_UTILIZATION,
+	_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS,
+	_MALI_OSK_LOCK_ORDER_PM_STATE,
+
+	_MALI_OSK_LOCK_ORDER_LAST,
+} _mali_osk_lock_order_t;
+
+
+/** @brief OSK Mutual Exclusion Lock flags type
+ *
+ * - Any lock can use the order parameter.
+ */
+typedef enum {
+	_MALI_OSK_LOCKFLAG_UNORDERED        = 0x1, /**< Indicate that the order of this lock should not be checked */
+	_MALI_OSK_LOCKFLAG_ORDERED          = 0x2,
+	/** @enum _mali_osk_lock_flags_t
+	 *
+	 * Flags from 0x10000--0x80000000 are RESERVED for User-mode */
+
+} _mali_osk_lock_flags_t;
+
+/** @brief Mutual Exclusion Lock Mode Optimization hint
+ *
+ * The lock mode is used to implement the read/write locking of locks when we call
+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can
+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for
+ * writers, and so will wait for all readers to release the lock (if any present).
+ * Further readers and writers will wait until the writer releases the lock.
+ *
+ * The mode is purely an optimization hint: for example, it is permissible for
+ * all locks to behave in RW mode, regardless of that supplied.
+ *
+ * It is an error to attempt to use locks in anything other that RW mode when
+ * call functions _mali_osk_mutex_rw_wait/signal().
+ *
+ */
+typedef enum {
+	_MALI_OSK_LOCKMODE_UNDEF = -1,  /**< Undefined lock mode. For internal use only */
+	_MALI_OSK_LOCKMODE_RW    = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */
+	_MALI_OSK_LOCKMODE_RO,          /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */
+	/** @enum _mali_osk_lock_mode_t
+	 *
+	 * Lock modes 0x40--0x7F are RESERVED for User-mode */
+} _mali_osk_lock_mode_t;
+
+/** @brief Private types for Mutual Exclusion lock objects */
+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t;
+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t;
+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t;
+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t;
+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t;
+
+/** @} */ /* end group _mali_osk_lock */
+
+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations
+ * @{ */
+
+/**
+ * @brief Private data type for use in IO accesses to/from devices.
+ *
+ * This represents some range that is accessible from the device. Examples
+ * include:
+ * - Device Registers, which could be readable and/or writeable.
+ * - Memory that the device has access to, for storing configuration structures.
+ *
+ * Access to this range must be made through the _mali_osk_mem_ioread32() and
+ * _mali_osk_mem_iowrite32() functions.
+ */
+typedef struct _mali_io_address *mali_io_address;
+
+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros.
+ *
+ * The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */
+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12)
+/** CPU Page Size, in bytes.               */
+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER))
+/** CPU Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1)))
+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */
+
+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros
+ *
+ * Mali Physical page size macros. The order of the page size is supplied for
+ * ease of use by algorithms that might require it, since it is easier to know
+ * it ahead of time rather than calculating it.
+ *
+ * The Mali Page Mask macro masks off the lower bits of a physical address to
+ * give the start address of the page for that physical address.
+ *
+ * @note The Mali device driver code is designed for systems with 4KB page size.
+ * Changing these macros will not make the entire Mali device driver work with
+ * page sizes other than 4KB.
+ *
+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU
+ * Physical Page Size.
+ *
+ * @{
+ */
+
+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */
+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT
+/** Mali Page Size, in bytes.               */
+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE
+/** Mali Page Mask, which masks off the offset within a page */
+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK
+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/
+
+/** @brief flags for mapping a user-accessible memory range
+ *
+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one
+ * of the function parameters, it will use one of these. These allow per-page
+ * control over mappings. Compare with the mali_memory_allocation_flag type,
+ * which acts over an entire range
+ *
+ * These may be OR'd together with bitwise OR (|), but must be cast back into
+ * the type after OR'ing.
+ */
+typedef enum {
+	_MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */
+} _mali_osk_mem_mapregion_flags_t;
+/** @} */ /* end group _mali_osk_low_level_memory */
+
+/** @defgroup _mali_osk_notification OSK Notification Queues
+ * @{ */
+
+/** @brief Private type for notification queue objects */
+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t;
+
+/** @brief Public notification data object type */
+typedef struct _mali_osk_notification_t_struct {
+	u32 notification_type;   /**< The notification type */
+	u32 result_buffer_size; /**< Size of the result buffer to copy to user space */
+	void *result_buffer;    /**< Buffer containing any type specific data */
+} _mali_osk_notification_t;
+
+/** @} */ /* end group _mali_osk_notification */
+
+
+/** @defgroup _mali_osk_timer OSK Timer Callbacks
+ * @{ */
+
+/** @brief Function to call when a timer expires
+ *
+ * When a timer expires, this function is called. Note that on many systems,
+ * a timer callback will be executed in IRQ context. Therefore, restrictions
+ * may apply on what can be done inside the timer callback.
+ *
+ * If a timer requires more work to be done than can be acheived in an IRQ
+ * context, then it may defer the work with a work-queue. For example, it may
+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler
+ * to carry out the remaining work.
+ *
+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of
+ * the callback. Therefore, the callback may not obtain any mutexes also held
+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur.
+ *
+ * @param arg Function-specific data */
+typedef void (*_mali_osk_timer_callback_t)(void *arg);
+
+/** @brief Private type for Timer Callback Objects */
+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t;
+/** @} */ /* end group _mali_osk_timer */
+
+
+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists
+ * @{ */
+
+/** @brief Public List objects.
+ *
+ * To use, add a _mali_osk_list_t member to the structure that may become part
+ * of a list. When traversing the _mali_osk_list_t objects, use the
+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its
+ *_mali_osk_list_t member
+ *
+ * Each structure may have multiple _mali_osk_list_t members, so that the
+ * structure is part of multiple lists. When traversing lists, ensure that the
+ * correct _mali_osk_list_t member is used, because type-checking will be
+ * lost by the compiler.
+ */
+typedef struct _mali_osk_list_s {
+	struct _mali_osk_list_s *next;
+	struct _mali_osk_list_s *prev;
+} _mali_osk_list_t;
+/** @} */ /* end group _mali_osk_list */
+
+/** @addtogroup _mali_osk_miscellaneous
+ * @{ */
+
+/** @brief resource description struct
+ *
+ * Platform independent representation of a Mali HW resource
+ */
+typedef struct _mali_osk_resource {
+	const char *description;        /**< short description of the resource */
+	uintptr_t base;                 /**< Physical base address of the resource, as seen by Mali resources. */
+	const char *irq_name;           /**< Name of irq belong to this resource */
+	u32 irq;                        /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */
+} _mali_osk_resource_t;
+/** @} */ /* end group _mali_osk_miscellaneous */
+
+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality
+ * @{ */
+/** @brief Private type for wait queue objects */
+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t;
+/** @} */ /* end group _mali_osk_wait_queue */
+
+/** @} */ /* end group osuapi */
+
+/** @} */ /* end group uddapi */
+
+/** @brief Mali print ctx type which uses seq_file
+  */
+typedef struct seq_file _mali_osk_print_ctx;
+
+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1
+
+typedef struct _mali_osk_bitmap {
+	u32         reserve;
+	u32         last;
+	u32         max;
+	u32         avail;
+	_mali_osk_spinlock_t   *lock;
+	unsigned long          *table;
+} _mali_osk_bitmap_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_OSK_TYPES_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_pm.c b/drivers/gpu/arm/utgard/common/mali_pm.c
new file mode 100644
index 000000000000..dbd94d310741
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pm.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pm.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_scheduler.h"
+#include "mali_group.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+
+#include "mali_executor.h"
+#include "mali_control_timer.h"
+
+#if defined(DEBUG)
+u32 num_pm_runtime_resume = 0;
+u32 num_pm_updates = 0;
+u32 num_pm_updates_up = 0;
+u32 num_pm_updates_down = 0;
+#endif
+
+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY)
+
+/* lock protecting power state (including pm_domains) */
+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL;
+
+/* the wanted domain mask (protected by pm_lock_state) */
+static u32 pd_mask_wanted = 0;
+
+/* used to deferring the actual power changes */
+static _mali_osk_wq_work_t *pm_work = NULL;
+
+/* lock protecting power change execution */
+static _mali_osk_mutex_t *pm_lock_exec = NULL;
+
+/* PMU domains which are actually powered on (protected by pm_lock_exec) */
+static u32 pmu_mask_current = 0;
+
+/*
+ * domains which marked as powered on (protected by pm_lock_exec)
+ * This can be different from pmu_mask_current right after GPU power on
+ * if the PMU domains default to powered up.
+ */
+static u32 pd_mask_current = 0;
+
+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1 << MALI_DOMAIN_INDEX_DUMMY
+};
+
+/* The relative core power cost */
+#define MALI_GP_COST 3
+#define MALI_PP_COST 6
+#define MALI_L2_COST 1
+
+/*
+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix
+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8.
+ */
+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS];
+/*
+ * Keep track of runtime PM state, so that we know
+ * how to resume during OS resume.
+ */
+#ifdef CONFIG_PM_RUNTIME
+static mali_bool mali_pm_runtime_active = MALI_FALSE;
+#else
+/* when kernel don't enable PM_RUNTIME, set the flag always true,
+ * for GPU will not power off by runtime */
+static mali_bool mali_pm_runtime_active = MALI_TRUE;
+#endif
+
+static void mali_pm_state_lock(void);
+static void mali_pm_state_unlock(void);
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void);
+static void mali_pm_set_pmu_domain_config(void);
+static u32 mali_pm_get_registered_cores_mask(void);
+static void mali_pm_update_sync_internal(void);
+static mali_bool mali_pm_common_suspend(void);
+static void mali_pm_update_work(void *data);
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+const char *mali_pm_group_stats_to_string(void);
+#endif
+
+_mali_osk_errcode_t mali_pm_initialize(void)
+{
+	_mali_osk_errcode_t err;
+	struct mali_pmu_core *pmu;
+
+	pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED,
+			_MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_state) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+					    _MALI_OSK_LOCK_ORDER_PM_STATE);
+	if (NULL == pm_lock_exec) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL);
+	if (NULL == pm_work) {
+		mali_pm_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pmu = mali_pmu_get_global_pmu_core();
+	if (NULL != pmu) {
+		/*
+		 * We have a Mali PMU, set the correct domain
+		 * configuration (default or custom)
+		 */
+
+		u32 registered_cores_mask;
+
+		mali_pm_set_pmu_domain_config();
+
+		registered_cores_mask = mali_pm_get_registered_cores_mask();
+		mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	}
+
+	/* Create all power domains needed (at least one dummy domain) */
+	err = mali_pm_create_pm_domains();
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_pm_terminate();
+		return err;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pm_terminate(void)
+{
+	if (NULL != pm_work) {
+		_mali_osk_wq_delete_work(pm_work);
+		pm_work = NULL;
+	}
+
+	mali_pm_domain_terminate();
+
+	if (NULL != pm_lock_exec) {
+		_mali_osk_mutex_term(pm_lock_exec);
+		pm_lock_exec = NULL;
+	}
+
+	if (NULL != pm_lock_state) {
+		_mali_osk_spinlock_irq_term(pm_lock_state);
+		pm_lock_state = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_l2_cache(domain, l2_cache);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group)
+{
+	struct mali_pm_domain *domain;
+
+	domain = mali_pm_domain_get_from_mask(domain_config[domain_index]);
+	if (NULL == domain) {
+		MALI_DEBUG_ASSERT(0 == domain_config[domain_index]);
+		domain = mali_pm_domain_get_from_index(
+				 MALI_DOMAIN_INDEX_DUMMY);
+		domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK;
+	} else {
+		MALI_DEBUG_ASSERT(0 != domain_config[domain_index]);
+	}
+
+	MALI_DEBUG_ASSERT(NULL != domain);
+
+	mali_pm_domain_add_group(domain, group);
+
+	return domain; /* return the actual domain this was registered in */
+}
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains)
+{
+	mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]);
+		if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) {
+			/*
+			 * Tell caller that the corresponding group
+			 * was not already powered on.
+			 */
+			ret = MALI_FALSE;
+		} else {
+			/*
+			 * There is a time gap between we power on the domain and
+			 * set the power state of the corresponding groups to be on.
+			 */
+			if (NULL != groups[i] &&
+			    MALI_FALSE == mali_group_power_is_on(groups[i])) {
+				ret = MALI_FALSE;
+			}
+		}
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains)
+{
+	u32 mask = 0;
+	mali_bool ret;
+	u32 i;
+
+	mali_pm_state_lock();
+
+	for (i = 0; i < num_domains; i++) {
+		MALI_DEBUG_ASSERT_POINTER(domains[i]);
+		mask |= mali_pm_domain_ref_put(domains[i]);
+	}
+
+	if (0 == mask) {
+		/* return false, all domains should still stay on */
+		ret = MALI_FALSE;
+	} else {
+		/* Assert that we are dealing with a change */
+		MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask);
+
+		/* Update our desired domain mask */
+		pd_mask_wanted &= ~mask;
+
+		/* return true; one or more domains can now be powered down */
+		ret = MALI_TRUE;
+	}
+
+	MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted));
+
+	mali_pm_state_unlock();
+
+	return ret;
+}
+
+void mali_pm_init_begin(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	_mali_osk_pm_dev_ref_get_sync();
+
+	/* Ensure all PMU domains are on */
+	if (NULL != pmu) {
+		mali_pmu_power_up_all(pmu);
+	}
+}
+
+void mali_pm_init_end(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	/* Ensure all PMU domains are off */
+	if (NULL != pmu) {
+		mali_pmu_power_down_all(pmu);
+	}
+
+	_mali_osk_pm_dev_ref_put();
+}
+
+void mali_pm_update_sync(void)
+{
+	mali_pm_exec_lock();
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/*
+		 * Only update if GPU is powered on.
+		 * Deactivation of the last group will result in both a
+		 * deferred runtime PM suspend operation and
+		 * deferred execution of this function.
+		 * mali_pm_runtime_active will be false if runtime PM
+		 * executed first and thus the GPU is now fully powered off.
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_update_async(void)
+{
+	_mali_osk_wq_schedule_work(pm_work);
+}
+
+void mali_pm_os_suspend(mali_bool os_suspend)
+{
+	int ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n"));
+
+	/* Suspend execution of all jobs, and go to inactive state */
+	mali_executor_suspend();
+
+	if (os_suspend) {
+		mali_control_timer_suspend(MALI_TRUE);
+	}
+
+	mali_pm_exec_lock();
+
+	ret = mali_pm_common_suspend();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == ret);
+	MALI_IGNORE(ret);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pm_os_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n"));
+
+	mali_pm_exec_lock();
+
+#if defined(DEBUG)
+	mali_pm_state_lock();
+
+	/* Assert that things are as we left them in os_suspend(). */
+	MALI_DEBUG_ASSERT(0 == pd_mask_wanted);
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+	mali_pm_state_unlock();
+#endif
+
+	if (MALI_TRUE == mali_pm_runtime_active) {
+		/* Runtime PM was active, so reset PMU */
+		if (NULL != pmu) {
+			mali_pmu_reset(pmu);
+			pmu_mask_current = mali_pmu_get_mask(pmu);
+
+			MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current));
+		}
+
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	/* Start executing jobs again */
+	mali_executor_resume();
+}
+
+mali_bool mali_pm_runtime_suspend(void)
+{
+	mali_bool ret;
+
+	MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n"));
+
+	mali_pm_exec_lock();
+
+	/*
+	 * Put SW state directly into "off" state, and do not bother to power
+	 * down each power domain, because entire GPU will be powered off
+	 * when we return.
+	 * For runtime PM suspend, in contrast to OS suspend, there is a race
+	 * between this function and the mali_pm_update_sync_internal(), which
+	 * is fine...
+	 */
+	ret = mali_pm_common_suspend();
+	if (MALI_TRUE == ret) {
+		mali_pm_runtime_active = MALI_FALSE;
+	} else {
+		/*
+		 * Process the "power up" instead,
+		 * which could have been "lost"
+		 */
+		mali_pm_update_sync_internal();
+	}
+
+	mali_pm_exec_unlock();
+
+	return ret;
+}
+
+void mali_pm_runtime_resume(void)
+{
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	mali_pm_exec_lock();
+
+	mali_pm_runtime_active = MALI_TRUE;
+
+#if defined(DEBUG)
+	++num_pm_runtime_resume;
+
+	mali_pm_state_lock();
+
+	/*
+	 * Assert that things are as we left them in runtime_suspend(),
+	 * except for pd_mask_wanted which normally will be the reason we
+	 * got here (job queued => domains wanted)
+	 */
+	MALI_DEBUG_ASSERT(0 == pd_mask_current);
+	MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+	mali_pm_state_unlock();
+#endif
+
+	if (NULL != pmu) {
+		mali_pmu_reset(pmu);
+		pmu_mask_current = mali_pmu_get_mask(pmu);
+		MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current));
+	}
+
+	/*
+	 * Normally we are resumed because a job has just been queued.
+	 * pd_mask_wanted should thus be != 0.
+	 * It is however possible for others to take a Mali Runtime PM ref
+	 * without having a job queued.
+	 * We should however always call mali_pm_update_sync_internal(),
+	 * because this will take care of any potential mismatch between
+	 * pmu_mask_current and pd_mask_current.
+	 */
+	mali_pm_update_sync_internal();
+
+	mali_pm_exec_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tPower domain: id %u\n",
+				mali_pm_domain_get_id(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tMask: 0x%04x\n",
+				mali_pm_domain_get_mask(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tUse count: %u\n",
+				mali_pm_domain_get_use_count(domain));
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tCurrent power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_current) ?
+				"On" : "Off");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\t\tWanted power state: %s\n",
+				(mali_pm_domain_get_mask(domain) & pd_mask_wanted) ?
+				"On" : "Off");
+
+	return n;
+}
+#endif
+
+static void mali_pm_state_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(pm_lock_state);
+}
+
+static void mali_pm_state_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(pm_lock_state);
+}
+
+void mali_pm_exec_lock(void)
+{
+	_mali_osk_mutex_wait(pm_lock_exec);
+}
+
+void mali_pm_exec_unlock(void)
+{
+	_mali_osk_mutex_signal(pm_lock_exec);
+}
+
+static void mali_pm_domain_power_up(u32 power_up_mask,
+				    struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS],
+				    u32 *num_groups_up,
+				    struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				    u32 *num_l2_up)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_up_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_up_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_up);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_up);
+	MALI_DEBUG_ASSERT(0 == *num_groups_up);
+	MALI_DEBUG_ASSERT_POINTER(l2_up);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_up);
+	MALI_DEBUG_ASSERT(0 == *num_l2_up);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering up domains: . [%s]\n",
+			  mali_pm_mask_to_string(power_up_mask)));
+
+	pd_mask_current |= power_up_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(
+				domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered up */
+		mali_pm_domain_set_power_on(domain, MALI_TRUE);
+
+		/*
+		 * Make a note of the L2 and/or group(s) to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(
+						    domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_up <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_up[*num_l2_up] = l2_cache;
+			(*num_l2_up)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_up <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_up[*num_groups_up] = group;
+
+			(*num_groups_up)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+static void mali_pm_domain_power_down(u32 power_down_mask,
+				      struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS],
+				      u32 *num_groups_down,
+				      struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES],
+				      u32 *num_l2_down)
+{
+	u32 domain_bit;
+	u32 notify_mask = power_down_mask;
+
+	MALI_DEBUG_ASSERT(0 != power_down_mask);
+	MALI_DEBUG_ASSERT_POINTER(groups_down);
+	MALI_DEBUG_ASSERT_POINTER(num_groups_down);
+	MALI_DEBUG_ASSERT(0 == *num_groups_down);
+	MALI_DEBUG_ASSERT_POINTER(l2_down);
+	MALI_DEBUG_ASSERT_POINTER(num_l2_down);
+	MALI_DEBUG_ASSERT(0 == *num_l2_down);
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state);
+
+	MALI_DEBUG_PRINT(5,
+			 ("PM update:      Powering down domains: [%s]\n",
+			  mali_pm_mask_to_string(power_down_mask)));
+
+	pd_mask_current &= ~power_down_mask;
+
+	domain_bit = _mali_osk_fls(notify_mask);
+	while (0 != domain_bit) {
+		u32 domain_id = domain_bit - 1;
+		struct mali_pm_domain *domain =
+			mali_pm_domain_get_from_index(domain_id);
+		struct mali_l2_cache_core *l2_cache;
+		struct mali_l2_cache_core *l2_cache_tmp;
+		struct mali_group *group;
+		struct mali_group *group_tmp;
+
+		/* Mark domain as powered down */
+		mali_pm_domain_set_power_on(domain, MALI_FALSE);
+
+		/*
+		 * Make a note of the L2s and/or groups to notify
+		 * (need to release the PM state lock before doing so)
+		 */
+
+		_MALI_OSK_LIST_FOREACHENTRY(l2_cache,
+					    l2_cache_tmp,
+					    mali_pm_domain_get_l2_cache_list(domain),
+					    struct mali_l2_cache_core,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_l2_down <
+					  MALI_MAX_NUMBER_OF_L2_CACHE_CORES);
+			l2_down[*num_l2_down] = l2_cache;
+			(*num_l2_down)++;
+		}
+
+		_MALI_OSK_LIST_FOREACHENTRY(group,
+					    group_tmp,
+					    mali_pm_domain_get_group_list(domain),
+					    struct mali_group,
+					    pm_domain_list) {
+			MALI_DEBUG_ASSERT(*num_groups_down <
+					  MALI_MAX_NUMBER_OF_GROUPS);
+			groups_down[*num_groups_down] = group;
+			(*num_groups_down)++;
+		}
+
+		/* Remove current bit and find next */
+		notify_mask &= ~(1 << (domain_id));
+		domain_bit = _mali_osk_fls(notify_mask);
+	}
+}
+
+/*
+ * Execute pending power domain changes
+ * pm_lock_exec lock must be taken by caller.
+ */
+static void mali_pm_update_sync_internal(void)
+{
+	/*
+	 * This should only be called in non-atomic context
+	 * (normally as deferred work)
+	 *
+	 * Look at the pending power domain changes, and execute these.
+	 * Make sure group and schedulers are notified about changes.
+	 */
+
+	struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core();
+
+	u32 power_down_mask;
+	u32 power_up_mask;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+
+#if defined(DEBUG)
+	++num_pm_updates;
+#endif
+
+	/* Hold PM state lock while we look at (and obey) the wanted state */
+	mali_pm_state_lock();
+
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update pre:  Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	/* Figure out which cores we need to power on */
+	power_up_mask = pd_mask_wanted &
+			(pd_mask_wanted ^ pd_mask_current);
+
+	if (0 != power_up_mask) {
+		u32 power_up_mask_pmu;
+		struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_up = 0;
+		struct mali_l2_cache_core *
+			l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_up = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_up;
+#endif
+
+		/*
+		 * Make sure dummy/global domain is always included when
+		 * powering up, since this is controlled by runtime PM,
+		 * and device power is on at this stage.
+		 */
+		power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* Power up only real PMU domains */
+		power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+		/* But not those that happen to be powered on already */
+		power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) &
+				     power_up_mask;
+
+		if (0 != power_up_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current |= power_up_mask_pmu;
+			mali_pmu_power_up(pmu, power_up_mask_pmu);
+		}
+
+		/*
+		 * Put the domains themselves in power up state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_up(power_up_mask,
+					groups_up, &num_groups_up,
+					l2_up, &num_l2_up);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/* Notify each L2 cache that we have be powered up */
+		for (i = 0; i < num_l2_up; i++) {
+			mali_l2_cache_power_up(l2_up[i]);
+		}
+
+		/*
+		 * Tell execution module about all the groups we have
+		 * powered up. Groups will be notified as a result of this.
+		 */
+		mali_executor_group_power_up(groups_up, num_groups_up);
+
+		/* Lock state again before checking for power down */
+		mali_pm_state_lock();
+	}
+
+	/* Figure out which cores we need to power off */
+	power_down_mask = pd_mask_current &
+			  (pd_mask_wanted ^ pd_mask_current);
+
+	/*
+	 * Never power down the dummy/global domain here. This is to be done
+	 * from a suspend request (since this domain is only physicall powered
+	 * down at that point)
+	 */
+	power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK;
+
+	if (0 != power_down_mask) {
+		u32 power_down_mask_pmu;
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+#if defined(DEBUG)
+		++num_pm_updates_down;
+#endif
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(power_down_mask,
+					  groups_down, &num_groups_down,
+					  l2_down, &num_l2_down);
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+
+		}
+	} else {
+		/*
+		 * Power down only PMU domains which should not stay on
+		 * Some domains might for instance currently be incorrectly
+		 * powered up if default domain power state is all on.
+		 */
+		u32 power_down_mask_pmu;
+
+		/* No need for state lock since we'll only update PMU */
+		mali_pm_state_unlock();
+
+		power_down_mask_pmu = pmu_mask_current & (~pd_mask_current);
+
+		if (0 != power_down_mask_pmu) {
+			MALI_DEBUG_ASSERT(NULL != pmu);
+			pmu_mask_current &= ~power_down_mask_pmu;
+			mali_pmu_power_down(pmu, power_down_mask_pmu);
+		}
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+}
+
+static mali_bool mali_pm_common_suspend(void)
+{
+	mali_pm_state_lock();
+
+	if (0 != pd_mask_wanted) {
+		MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n"));
+		mali_pm_state_unlock();
+		return MALI_FALSE;
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_wanted)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	if (0 != pd_mask_current) {
+		/*
+		 * We have still some domains powered on.
+		 * It is for instance very normal that at least the
+		 * dummy/global domain is marked as powered on at this point.
+		 * (because it is physically powered on until this function
+		 * returns)
+		 */
+
+		struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS];
+		u32 num_groups_down = 0;
+		struct mali_l2_cache_core *
+			l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES];
+		u32 num_l2_down = 0;
+		u32 i;
+
+		/*
+		 * Put the domains themselves in power down state.
+		 * We get the groups and L2s to notify in return.
+		 */
+		mali_pm_domain_power_down(pd_mask_current,
+					  groups_down,
+					  &num_groups_down,
+					  l2_down,
+					  &num_l2_down);
+
+		MALI_DEBUG_ASSERT(0 == pd_mask_current);
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		/* Need to unlock PM state lock before notifying L2 + groups */
+		mali_pm_state_unlock();
+
+		/*
+		 * Tell execution module about all the groups we will be
+		 * powering down. Groups will be notified as a result of this.
+		 */
+		if (0 < num_groups_down) {
+			mali_executor_group_power_down(groups_down, num_groups_down);
+		}
+
+		/* Notify each L2 cache that we will be powering down */
+		for (i = 0; i < num_l2_down; i++) {
+			mali_l2_cache_power_down(l2_down[i]);
+		}
+
+		pmu_mask_current = 0;
+	} else {
+		MALI_DEBUG_ASSERT(0 == pmu_mask_current);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused());
+
+		mali_pm_state_unlock();
+	}
+
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask:  [%s]\n",
+			     mali_pm_mask_to_string(pd_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n",
+			     mali_pm_mask_to_string(pmu_mask_current)));
+	MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n",
+			     mali_pm_group_stats_to_string()));
+
+	return MALI_TRUE;
+}
+
+static void mali_pm_update_work(void *data)
+{
+	MALI_IGNORE(data);
+	mali_pm_update_sync();
+}
+
+static _mali_osk_errcode_t mali_pm_create_pm_domains(void)
+{
+	int i;
+
+	/* Create all domains (including dummy domain) */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (0x0 == domain_config[i]) continue;
+
+		if (NULL == mali_pm_domain_create(domain_config[i])) {
+			return _MALI_OSK_ERR_NOMEM;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_pm_set_default_pm_domain_config(void)
+{
+	MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address());
+
+	/* GP core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_GP, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_GP] = 0x01;
+	}
+
+	/* PP0 - PP3 core */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP0, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP1, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP2, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1;
+		}
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP3, NULL)) {
+		if (mali_is_mali400()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5;
+		} else if (mali_is_mali450()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2;
+		} else if (mali_is_mali470()) {
+			domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1;
+		}
+	}
+
+	/* PP4 - PP7 */
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP4, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP5, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP6, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3;
+	}
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+		    MALI_OFFSET_PP7, NULL)) {
+		domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3;
+	}
+
+	/* L2gp/L2PP0/L2PP4 */
+	if (mali_is_mali400()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI400_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1;
+		}
+	} else if (mali_is_mali450()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE0, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1;
+		}
+
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI450_OFFSET_L2_CACHE2, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3;
+		}
+	} else if (mali_is_mali470()) {
+		if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(
+			    MALI470_OFFSET_L2_CACHE1, NULL)) {
+			domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0;
+		}
+	}
+}
+
+static u32 mali_pm_get_registered_cores_mask(void)
+{
+	int i = 0;
+	u32 mask = 0;
+
+	for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) {
+		mask |= domain_config[i];
+	}
+
+	return mask;
+}
+
+static void mali_pm_set_pmu_domain_config(void)
+{
+	int i = 0;
+
+	_mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1);
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (0 != domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n"));
+			break;
+		}
+	}
+
+	if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) {
+		MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n"));
+		mali_pm_set_default_pm_domain_config();
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) {
+		if (domain_config[i]) {
+			MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i]));
+		}
+	}
+	/* Can't override dummy domain mask */
+	domain_config[MALI_DOMAIN_INDEX_DUMMY] =
+		1 << MALI_DOMAIN_INDEX_DUMMY;
+}
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1];
+	int bit;
+	int str_pos = 0;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) {
+		if (mask & (1 << bit)) {
+			bit_str[str_pos] = 'X';
+		} else {
+			bit_str[str_pos] = '-';
+		}
+		str_pos++;
+	}
+
+	bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0';
+
+	return bit_str;
+}
+
+const char *mali_pm_group_stats_to_string(void)
+{
+	static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1];
+	u32 num_groups = mali_group_get_glob_num_groups();
+	u32 i;
+
+	/* Must be protected by lock since we use shared string buffer */
+	if (NULL != pm_lock_exec) {
+		MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec);
+	}
+
+	for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) {
+		struct mali_group *group;
+
+		group = mali_group_get_glob_group(i);
+
+		if (MALI_TRUE == mali_group_power_is_on(group)) {
+			bit_str[i] = 'X';
+		} else {
+			bit_str[i] = '-';
+		}
+	}
+
+	bit_str[i] = '\0';
+
+	return bit_str;
+}
+#endif
+
+/*
+ * num_pp is the number of PP cores which will be powered on given this mask
+ * cost is the total power cost of cores which will be powered on given this mask
+ */
+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost)
+{
+	u32 i;
+
+	/* loop through all cores */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (!(domain_config[i] & mask)) {
+			continue;
+		}
+
+		switch (i) {
+		case MALI_DOMAIN_INDEX_GP:
+			*cost += MALI_GP_COST;
+
+			break;
+		case MALI_DOMAIN_INDEX_PP0: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP1: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP2: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP3:
+			if (mali_is_mali400()) {
+				if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L20])) {
+					*num_pp += 1;
+				}
+			} else {
+				if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask)
+				    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+					== domain_config[MALI_DOMAIN_INDEX_L21])) {
+					*num_pp += 1;
+				}
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_PP4: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP5: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP6: /* Fall through */
+		case MALI_DOMAIN_INDEX_PP7:
+			MALI_DEBUG_ASSERT(mali_is_mali450());
+
+			if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask)
+			    || (domain_config[MALI_DOMAIN_INDEX_DUMMY]
+				== domain_config[MALI_DOMAIN_INDEX_L22])) {
+				*num_pp += 1;
+			}
+
+			*cost += MALI_PP_COST;
+			break;
+		case MALI_DOMAIN_INDEX_L20: /* Fall through */
+		case MALI_DOMAIN_INDEX_L21: /* Fall through */
+		case MALI_DOMAIN_INDEX_L22:
+			*cost += MALI_L2_COST;
+
+			break;
+		}
+	}
+}
+
+void mali_pm_power_cost_setup(void)
+{
+	/*
+	 * Two parallel arrays which store the best domain mask and its cost
+	 * The index is the number of PP cores, E.g. Index 0 is for 1 PP option,
+	 * might have mask 0x2 and with cost of 1, lower cost is better
+	 */
+	u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 };
+	/* Array cores_in_domain is used to store the total pp cores in each pm domain. */
+	u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 };
+	/* Domain_count is used to represent the max domain we have.*/
+	u32 max_domain_mask = 0;
+	u32 max_domain_id = 0;
+	u32 always_on_pp_cores = 0;
+
+	u32 num_pp, cost, mask;
+	u32 i, j , k;
+
+	/* Initialize statistics */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) {
+		best_mask[i] = 0;
+		best_cost[i] = 0xFFFFFFFF; /* lower cost is better */
+	}
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) {
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			mali_pm_domain_power_cost_result[i][j] = 0;
+		}
+	}
+
+	/* Caculate number of pp cores of a given domain config. */
+	for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) {
+		if (0 < domain_config[i]) {
+			/* Get the max domain mask value used to caculate power cost
+			 * and we don't count in always on pp cores. */
+			if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i]
+			    && max_domain_mask < domain_config[i]) {
+				max_domain_mask = domain_config[i];
+			}
+
+			if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) {
+				always_on_pp_cores++;
+			}
+		}
+	}
+	max_domain_id = _mali_osk_fls(max_domain_mask);
+
+	/*
+	 * Try all combinations of power domains and check how many PP cores
+	 * they have and their power cost.
+	 */
+	for (mask = 0; mask < (1 << max_domain_id); mask++) {
+		num_pp = 0;
+		cost = 0;
+
+		mali_pm_stat_from_mask(mask, &num_pp, &cost);
+
+		/* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */
+		for (i = 0; i < num_pp; i++) {
+			if (best_cost[i] >= cost) {
+				best_cost[i] = cost;
+				best_mask[i] = mask;
+			}
+		}
+	}
+
+	/*
+	 * If we want to enable x pp cores, if x is less than number of always_on pp cores,
+	 * all of pp cores we will enable must be always_on pp cores.
+	 */
+	for (i = 0; i < mali_executor_get_num_cores_total(); i++) {
+		if (i < always_on_pp_cores) {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= i + 1;
+		} else {
+			mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1]
+				= always_on_pp_cores;
+		}
+	}
+
+	/* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */
+	for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) {
+		if (best_mask[i] == 0) {
+			/* This MP variant is not available */
+			continue;
+		}
+
+		for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) {
+			cores_in_domain[j] = 0;
+		}
+
+		for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) {
+			if (0 < domain_config[j]
+			    && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) {
+				cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++;
+			}
+		}
+
+		/* In this loop, j represent for the number we have already enabled.*/
+		for (j = 0; j <= i;) {
+			/* j used to visit all of domain to get the number of pp cores remained in it. */
+			for (k = 0; k < max_domain_id; k++) {
+				/* If domain k in best_mask[i] is enabled and this domain has extra pp cores,
+				 * we know we must pick at least one pp core from this domain.
+				 * And then we move to next enabled pm domain. */
+				if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) {
+					cores_in_domain[k]--;
+					mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++;
+					j++;
+					if (j > i) {
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * When we are doing core scaling,
+ * this function is called to return the best mask to
+ * achieve the best pp group power cost.
+ */
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst)
+{
+	MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested));
+
+	_mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int));
+}
+
+u32 mali_pm_get_current_mask(void)
+{
+	return pd_mask_current;
+}
+
+u32 mali_pm_get_wanted_mask(void)
+{
+	return pd_mask_wanted;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_pm.h b/drivers/gpu/arm/utgard/common/mali_pm.h
new file mode 100644
index 000000000000..d72c732e698d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_H__
+#define __MALI_PM_H__
+
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+
+#define MALI_DOMAIN_INDEX_GP        0
+#define MALI_DOMAIN_INDEX_PP0       1
+#define MALI_DOMAIN_INDEX_PP1       2
+#define MALI_DOMAIN_INDEX_PP2       3
+#define MALI_DOMAIN_INDEX_PP3       4
+#define MALI_DOMAIN_INDEX_PP4       5
+#define MALI_DOMAIN_INDEX_PP5       6
+#define MALI_DOMAIN_INDEX_PP6       7
+#define MALI_DOMAIN_INDEX_PP7       8
+#define MALI_DOMAIN_INDEX_L20       9
+#define MALI_DOMAIN_INDEX_L21      10
+#define MALI_DOMAIN_INDEX_L22      11
+/*
+ * The dummy domain is used when there is no physical power domain
+ * (e.g. no PMU or always on cores)
+ */
+#define MALI_DOMAIN_INDEX_DUMMY    12
+#define MALI_MAX_NUMBER_OF_DOMAINS 13
+
+/**
+ * Initialize the Mali PM module
+ *
+ * PM module covers Mali PM core, PM domains and Mali PMU
+ */
+_mali_osk_errcode_t mali_pm_initialize(void);
+
+/**
+ * Terminate the Mali PM module
+ */
+void mali_pm_terminate(void);
+
+void mali_pm_exec_lock(void);
+void mali_pm_exec_unlock(void);
+
+
+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index,
+		struct mali_l2_cache_core *l2_cache);
+struct mali_pm_domain *mali_pm_register_group(u32 domain_index,
+		struct mali_group *group);
+
+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains,
+				  struct mali_group **groups,
+				  u32 num_domains);
+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains,
+				  u32 num_domains);
+
+void mali_pm_init_begin(void);
+void mali_pm_init_end(void);
+
+void mali_pm_update_sync(void);
+void mali_pm_update_async(void);
+
+/* Callback functions for system power management */
+void mali_pm_os_suspend(mali_bool os_suspend);
+void mali_pm_os_resume(void);
+
+mali_bool mali_pm_runtime_suspend(void);
+void mali_pm_runtime_resume(void);
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain,
+			      char *buf, u32 size);
+#endif
+
+void mali_pm_power_cost_setup(void);
+
+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst);
+
+#if defined(DEBUG)
+const char *mali_pm_mask_to_string(u32 mask);
+#endif
+
+u32 mali_pm_get_current_mask(void);
+u32 mali_pm_get_wanted_mask(void);
+#endif /* __MALI_PM_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_pm_domain.c b/drivers/gpu/arm/utgard/common/mali_pm_domain.c
new file mode 100644
index 000000000000..dbf985e6d37b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pm_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm_domain.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_pm.h"
+
+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] =
+{ NULL, };
+
+void mali_pm_domain_initialize(void)
+{
+	/* Domains will be initialized/created on demand */
+}
+
+void mali_pm_domain_terminate(void)
+{
+	int i;
+
+	/* Delete all domains that has been created */
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		mali_pm_domain_delete(mali_pm_domains[i]);
+		mali_pm_domains[i] = NULL;
+	}
+}
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask)
+{
+	struct mali_pm_domain *domain = NULL;
+	u32 domain_id = 0;
+
+	domain = mali_pm_domain_get_from_mask(pmu_mask);
+	if (NULL != domain) return domain;
+
+	MALI_DEBUG_PRINT(2,
+			 ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n",
+			  pmu_mask));
+
+	domain = (struct mali_pm_domain *)_mali_osk_malloc(
+			 sizeof(struct mali_pm_domain));
+	if (NULL != domain) {
+		domain->power_is_on = MALI_FALSE;
+		domain->pmu_mask = pmu_mask;
+		domain->use_count = 0;
+		_mali_osk_list_init(&domain->group_list);
+		_mali_osk_list_init(&domain->l2_cache_list);
+
+		domain_id = _mali_osk_fls(pmu_mask) - 1;
+		/* Verify the domain_id */
+		MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id);
+		/* Verify that pmu_mask only one bit is set */
+		MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask);
+		mali_pm_domains[domain_id] = domain;
+
+		return domain;
+	} else {
+		MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pm_domain_delete(struct mali_pm_domain *domain)
+{
+	if (NULL == domain) {
+		return;
+	}
+
+	_mali_osk_list_delinit(&domain->group_list);
+	_mali_osk_list_delinit(&domain->l2_cache_list);
+
+	_mali_osk_free(domain);
+}
+
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	/*
+	 * Use addtail because virtual group is created last and it needs
+	 * to be at the end of the list (in order to be activated after
+	 * all children.
+	 */
+	_mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list);
+}
+
+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain,
+				 struct mali_l2_cache_core *l2_cache)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT_POINTER(l2_cache);
+	_mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list);
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask)
+{
+	u32 id = 0;
+
+	if (0 == mask) {
+		return NULL;
+	}
+
+	id = _mali_osk_fls(mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == mask);
+
+	return mali_pm_domains[id];
+}
+
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id)
+{
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+
+	return mali_pm_domains[id];
+}
+
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_get_async();
+	}
+
+	++domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count));
+
+	/* Return our mask so caller can check this against wanted mask */
+	return domain->pmu_mask;
+}
+
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+
+	--domain->use_count;
+	MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count));
+
+	if (0 == domain->use_count) {
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	/*
+	 * Return the PMU mask which now could be be powered down
+	 * (the bit for this domain).
+	 * This is the responsibility of the caller (mali_pm)
+	 */
+	return (0 == domain->use_count ? domain->pmu_mask : 0);
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain)
+{
+	u32 id = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	MALI_DEBUG_ASSERT(0 != domain->pmu_mask);
+
+	id = _mali_osk_fls(domain->pmu_mask) - 1;
+
+	MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id);
+	/* Verify that pmu_mask only one bit is set */
+	MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask);
+	/* Verify that we have stored the domain at right id/index */
+	MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]);
+
+	return id;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void)
+{
+	int i;
+
+	for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) {
+		if (NULL == mali_pm_domains[i]) {
+			/* Nothing to check */
+			continue;
+		}
+
+		if (MALI_TRUE == mali_pm_domains[i]->power_is_on) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+
+		if (0 != mali_pm_domains[i]->use_count) {
+			/* Not ready for suspend! */
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
+#endif
diff --git a/drivers/gpu/arm/utgard/common/mali_pm_domain.h b/drivers/gpu/arm/utgard/common/mali_pm_domain.h
new file mode 100644
index 000000000000..aceb3449359a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pm_domain.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PM_DOMAIN_H__
+#define __MALI_PM_DOMAIN_H__
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+#include "mali_l2_cache.h"
+#include "mali_group.h"
+#include "mali_pmu.h"
+
+/* Instances are protected by PM state lock */
+struct mali_pm_domain {
+	mali_bool power_is_on;
+	s32 use_count;
+	u32 pmu_mask;
+
+	/* Zero or more groups can belong to this domain */
+	_mali_osk_list_t group_list;
+
+	/* Zero or more L2 caches can belong to this domain */
+	_mali_osk_list_t l2_cache_list;
+};
+
+
+void mali_pm_domain_initialize(void);
+void mali_pm_domain_terminate(void);
+
+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask);
+void mali_pm_domain_delete(struct mali_pm_domain *domain);
+
+void mali_pm_domain_add_l2_cache(
+	struct mali_pm_domain *domain,
+	struct mali_l2_cache_core *l2_cache);
+void mali_pm_domain_add_group(struct mali_pm_domain *domain,
+			      struct mali_group *group);
+
+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask);
+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id);
+
+/* Ref counting */
+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain);
+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->group_list;
+}
+
+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return &domain->l2_cache_list;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->power_is_on;
+}
+
+MALI_STATIC_INLINE void mali_pm_domain_set_power_on(
+	struct mali_pm_domain *domain,
+	mali_bool power_is_on)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	domain->power_is_on = power_is_on;
+}
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count(
+	struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->use_count;
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain);
+
+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain)
+{
+	MALI_DEBUG_ASSERT_POINTER(domain);
+	return domain->pmu_mask;
+}
+#endif
+
+#if defined(DEBUG)
+mali_bool mali_pm_domain_all_unused(void);
+#endif
+
+#endif /* __MALI_PM_DOMAIN_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_pmu.c b/drivers/gpu/arm/utgard/common/mali_pmu.c
new file mode 100644
index 000000000000..2a3008a6dd83
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pmu.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu.c
+ * Mali driver functions for Mali 400 PMU hardware
+ */
+#include "mali_hw_core.h"
+#include "mali_pmu.h"
+#include "mali_pp.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_pm.h"
+#include "mali_osk_mali.h"
+
+struct mali_pmu_core *mali_global_pmu_core = NULL;
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu);
+
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource)
+{
+	struct mali_pmu_core *pmu;
+
+	MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core);
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n"));
+
+	pmu = (struct mali_pmu_core *)_mali_osk_malloc(
+		      sizeof(struct mali_pmu_core));
+	if (NULL != pmu) {
+		pmu->registered_cores_mask = 0; /* to be set later */
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core,
+				resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) {
+
+			pmu->switch_delay = _mali_osk_get_pmu_switch_delay();
+
+			mali_global_pmu_core = pmu;
+
+			return pmu;
+		}
+		_mali_osk_free(pmu);
+	}
+
+	return NULL;
+}
+
+void mali_pmu_delete(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core);
+
+	MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n"));
+
+	mali_global_pmu_core = NULL;
+
+	mali_hw_core_delete(&pmu->hw_core);
+	_mali_osk_free(pmu);
+}
+
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask)
+{
+	pmu->registered_cores_mask = mask;
+}
+
+void mali_pmu_reset(struct mali_pmu_core *pmu)
+{
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	/* Setup the desired defaults */
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_MASK, 0);
+	mali_hw_core_register_write_relaxed(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay);
+}
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	mali_pmu_reset(pmu);
+
+	/* Now simply power up the domains which are marked as powered down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_up(pmu, stat);
+
+	mali_pm_exec_unlock();
+}
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu)
+{
+	u32 stat;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+
+	mali_pm_exec_lock();
+
+	/* Now simply power down the domains which are marked as powered up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask);
+
+	mali_pm_exec_unlock();
+}
+
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power down: ...................... [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+
+	/*
+	 * Assert that we are not powering down domains which are already
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+
+	if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK;
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_DOWN, mask);
+
+	/*
+	 * Do not wait for interrupt on Mali-300/400 if all domains are
+	 * powered off by our power down command, because the HW will simply
+	 * not generate an interrupt in this case.
+	 */
+	if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) {
+		err = mali_pmu_wait_for_command_finish(pmu);
+		if (_MALI_OSK_ERR_OK != err) {
+			return err;
+		}
+	} else {
+		mali_hw_core_register_write(&pmu->hw_core,
+					    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+	}
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power down */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+#endif
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask)
+{
+	u32 stat;
+	_mali_osk_errcode_t err;
+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	u32 current_domain;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(pmu);
+	MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0);
+	MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask);
+	MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core,
+				PMU_REG_ADDR_MGMT_INT_RAWSTAT) &
+				PMU_REG_VAL_IRQ));
+
+	MALI_DEBUG_PRINT(3,
+			 ("PMU power up: ........................ [%s]\n",
+			  mali_pm_mask_to_string(mask)));
+
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	stat &= pmu->registered_cores_mask;
+
+	mask  &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY);
+	if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK;
+
+	/*
+	 * Assert that we are only powering up domains which are currently
+	 * powered down.
+	 */
+	MALI_DEBUG_ASSERT(mask == (stat & mask));
+
+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP)
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_POWER_UP, mask);
+
+	err = mali_pmu_wait_for_command_finish(pmu);
+	if (_MALI_OSK_ERR_OK != err) {
+		return err;
+	}
+#else
+	for (current_domain = 1;
+	     current_domain <= pmu->registered_cores_mask;
+	     current_domain <<= 1) {
+		if (current_domain & mask & stat) {
+			mali_hw_core_register_write(&pmu->hw_core,
+						    PMU_REG_ADDR_MGMT_POWER_UP,
+						    current_domain);
+
+			err = mali_pmu_wait_for_command_finish(pmu);
+			if (_MALI_OSK_ERR_OK != err) {
+				return err;
+			}
+		}
+	}
+#endif
+
+#if defined(DEBUG)
+	/* Verify power status of domains after power up */
+	stat = mali_hw_core_register_read(&pmu->hw_core,
+					  PMU_REG_ADDR_MGMT_STATUS);
+	MALI_DEBUG_ASSERT(0 == (stat & mask));
+#endif /* defined(DEBUG) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish(
+	struct mali_pmu_core *pmu)
+{
+	u32 rawstat;
+	u32 timeout = MALI_REG_POLL_COUNT_SLOW;
+
+	MALI_DEBUG_ASSERT(pmu);
+
+	/* Wait for the command to complete */
+	do {
+		rawstat = mali_hw_core_register_read(&pmu->hw_core,
+						     PMU_REG_ADDR_MGMT_INT_RAWSTAT);
+		--timeout;
+	} while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout);
+
+	MALI_DEBUG_ASSERT(0 < timeout);
+
+	if (0 == timeout) {
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	mali_hw_core_register_write(&pmu->hw_core,
+				    PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ);
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_pmu.h b/drivers/gpu/arm/utgard/common/mali_pmu.h
new file mode 100644
index 000000000000..5ca78795f535
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pmu.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.h
+ * Platform specific Mali driver functions
+ */
+
+#ifndef __MALI_PMU_H__
+#define __MALI_PMU_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_hw_core.h"
+
+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask
+ */
+struct mali_pmu_core {
+	struct mali_hw_core hw_core;
+	u32 registered_cores_mask;
+	u32 switch_delay;
+};
+
+/** @brief Register layout for hardware PMU
+ */
+typedef enum {
+	PMU_REG_ADDR_MGMT_POWER_UP                  = 0x00,     /*< Power up register */
+	PMU_REG_ADDR_MGMT_POWER_DOWN                = 0x04,     /*< Power down register */
+	PMU_REG_ADDR_MGMT_STATUS                    = 0x08,     /*< Core sleep status register */
+	PMU_REG_ADDR_MGMT_INT_MASK                  = 0x0C,     /*< Interrupt mask register */
+	PMU_REG_ADDR_MGMT_INT_RAWSTAT               = 0x10,     /*< Interrupt raw status register */
+	PMU_REG_ADDR_MGMT_INT_CLEAR                 = 0x18,     /*< Interrupt clear register */
+	PMU_REG_ADDR_MGMT_SW_DELAY                  = 0x1C,     /*< Switch delay register */
+	PMU_REGISTER_ADDRESS_SPACE_SIZE             = 0x28,     /*< Size of register space */
+} pmu_reg_addr_mgmt_addr;
+
+#define PMU_REG_VAL_IRQ 1
+
+extern struct mali_pmu_core *mali_global_pmu_core;
+
+/** @brief Initialisation of MALI PMU
+ *
+ * This is called from entry point of the driver in order to create and intialize the PMU resource
+ *
+ * @param resource it will be a pointer to a PMU resource
+ * @param number_of_pp_cores Number of found PP resources in configuration
+ * @param number_of_l2_caches Number of found L2 cache resources in configuration
+ * @return The created PMU object, or NULL in case of failure.
+ */
+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource);
+
+/** @brief It deallocates the PMU resource
+ *
+ * This is called on the exit of the driver to terminate the PMU resource
+ *
+ * @param pmu Pointer to PMU core object to delete
+ */
+void mali_pmu_delete(struct mali_pmu_core *pmu);
+
+/** @brief Set registered cores mask
+ *
+ * @param pmu Pointer to PMU core object
+ * @param mask All available/valid domain bits
+ */
+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief Retrieves the Mali PMU core object (if any)
+ *
+ * @return The Mali PMU object, or NULL if no PMU exists.
+ */
+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void)
+{
+	return mali_global_pmu_core;
+}
+
+/** @brief Reset PMU core
+ *
+ * @param pmu Pointer to PMU core object to reset
+ */
+void mali_pmu_reset(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_up_all(struct mali_pmu_core *pmu);
+
+void mali_pmu_power_down_all(struct mali_pmu_core *pmu);
+
+/** @brief Returns a mask of the currently powered up domains
+ *
+ * @param pmu Pointer to PMU core object
+ */
+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu)
+{
+	u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS);
+	return ((~stat) & pmu->registered_cores_mask);
+}
+
+/** @brief MALI GPU power down using MALI in-built PMU
+ *
+ * Called to power down the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power down
+ * @param mask Mask specifying which power domains to power down
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask);
+
+/** @brief MALI GPU power up using MALI in-built PMU
+ *
+ * Called to power up the specified cores.
+ *
+ * @param pmu Pointer to PMU core object to power up
+ * @param mask Mask specifying which power domains to power up
+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error.
+ */
+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask);
+
+#endif /* __MALI_PMU_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_pp.c b/drivers/gpu/arm/utgard/common/mali_pp.c
new file mode 100644
index 000000000000..68bfd50bf9ae
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pp.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp_job.h"
+#include "mali_pp.h"
+#include "mali_hw_core.h"
+#include "mali_group.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_core.h"
+#if defined(CONFIG_MALI400_PROFILING)
+#include "mali_osk_profiling.h"
+#endif
+
+/* Number of frame registers on Mali-200 */
+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1)
+/* Number of frame registers on Mali-300 and later */
+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1)
+
+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL };
+static u32 mali_global_num_pp_cores = 0;
+
+/* Interrupt handlers */
+static void mali_pp_irq_probe_trigger(void *data);
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id)
+{
+	struct mali_pp_core *core = NULL;
+
+	MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base));
+
+	if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) {
+		MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n"));
+		return NULL;
+	}
+
+	core = _mali_osk_calloc(1, sizeof(struct mali_pp_core));
+	if (NULL != core) {
+		core->core_id = mali_global_num_pp_cores;
+		core->bcast_id = bcast_id;
+
+		if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) {
+			_mali_osk_errcode_t ret;
+
+			if (!is_virtual) {
+				ret = mali_pp_reset(core);
+			} else {
+				ret = _MALI_OSK_ERR_OK;
+			}
+
+			if (_MALI_OSK_ERR_OK == ret) {
+				ret = mali_group_add_pp_core(group, core);
+				if (_MALI_OSK_ERR_OK == ret) {
+					/* Setup IRQ handlers (which will do IRQ probing if needed) */
+					MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq);
+
+					core->irq = _mali_osk_irq_init(resource->irq,
+								       mali_group_upper_half_pp,
+								       group,
+								       mali_pp_irq_probe_trigger,
+								       mali_pp_irq_probe_ack,
+								       core,
+								       resource->description);
+					if (NULL != core->irq) {
+						mali_global_pp_cores[mali_global_num_pp_cores] = core;
+						mali_global_num_pp_cores++;
+
+						return core;
+					} else {
+						MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description));
+					}
+					mali_group_remove_pp_core(group);
+				} else {
+					MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description));
+				}
+			}
+			mali_hw_core_delete(&core->hw_core);
+		}
+
+		_mali_osk_free(core);
+	} else {
+		MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n"));
+	}
+
+	return NULL;
+}
+
+void mali_pp_delete(struct mali_pp_core *core)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	_mali_osk_irq_term(core->irq);
+	mali_hw_core_delete(&core->hw_core);
+
+	/* Remove core from global list */
+	for (i = 0; i < mali_global_num_pp_cores; i++) {
+		if (mali_global_pp_cores[i] == core) {
+			mali_global_pp_cores[i] = NULL;
+			mali_global_num_pp_cores--;
+
+			if (i != mali_global_num_pp_cores) {
+				/* We removed a PP core from the middle of the array -- move the last
+				 * PP core to the current position to close the gap */
+				mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores];
+				mali_global_pp_cores[mali_global_num_pp_cores] = NULL;
+			}
+
+			break;
+		}
+	}
+
+	_mali_osk_free(core);
+}
+
+void mali_pp_stop_bus(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	/* Will only send the stop bus command, and not wait for it to complete */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS);
+}
+
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core)
+{
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Send the stop bus command. */
+	mali_pp_stop_bus(core);
+
+	/* Wait for bus to be stopped */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED)
+			break;
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Frame register reset values.
+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */
+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = {
+	0x0, /* Renderer List Address Register */
+	0x0, /* Renderer State Word Base Address Register */
+	0x0, /* Renderer Vertex Base Register */
+	0x2, /* Feature Enable Register */
+	0x0, /* Z Clear Value Register */
+	0x0, /* Stencil Clear Value Register */
+	0x0, /* ABGR Clear Value 0 Register */
+	0x0, /* ABGR Clear Value 1 Register */
+	0x0, /* ABGR Clear Value 2 Register */
+	0x0, /* ABGR Clear Value 3 Register */
+	0x0, /* Bounding Box Left Right Register */
+	0x0, /* Bounding Box Bottom Register */
+	0x0, /* FS Stack Address Register */
+	0x0, /* FS Stack Size and Initial Value Register */
+	0x0, /* Reserved */
+	0x0, /* Reserved */
+	0x0, /* Origin Offset X Register */
+	0x0, /* Origin Offset Y Register */
+	0x75, /* Subpixel Specifier Register */
+	0x0, /* Tiebreak mode Register */
+	0x0, /* Polygon List Format Register */
+	0x0, /* Scaling Register */
+	0x0 /* Tilebuffer configuration Register */
+};
+
+/* WBx register reset values */
+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = {
+	0x0, /* WBx Source Select Register */
+	0x0, /* WBx Target Address Register */
+	0x0, /* WBx Target Pixel Format Register */
+	0x0, /* WBx Target AA Format Register */
+	0x0, /* WBx Target Layout */
+	0x0, /* WBx Target Scanline Length */
+	0x0, /* WBx Target Flags Register */
+	0x0, /* WBx MRT Enable Register */
+	0x0, /* WBx MRT Offset Register */
+	0x0, /* WBx Global Test Enable Register */
+	0x0, /* WBx Global Test Reference Value Register */
+	0x0  /* WBx Global Test Compare Function Register */
+};
+
+/* Performance Counter 0 Enable Register reset value */
+static const u32 mali_perf_cnt_enable_reset_value = 0;
+
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core)
+{
+	/* Bus must be stopped before calling this function */
+	const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT;
+	const u32 reset_invalid_value = 0xC0FFE000;
+	const u32 reset_check_value = 0xC01A0000;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+	MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description));
+
+	/* Set register to a bogus value. The register will be used to detect when reset is complete */
+	mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+
+	/* Force core to reset */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET);
+
+	/* Wait for reset to be complete */
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value);
+		if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) {
+			break;
+		}
+	}
+
+	if (MALI_REG_POLL_COUNT_FAST == i) {
+		MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n"));
+	}
+
+	mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_pp_reset_async(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description));
+
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET);
+}
+
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core)
+{
+	int i;
+	u32 rawstat = 0;
+
+	for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) {
+		u32 status =  mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+		if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) {
+			rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+			if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) {
+				break;
+			}
+		}
+	}
+
+	if (i == MALI_REG_POLL_COUNT_FAST) {
+		MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n",
+				  core->hw_core.description, rawstat));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Re-enable interrupts */
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core)
+{
+	mali_pp_reset_async(core);
+	return mali_pp_reset_wait(core);
+}
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual)
+{
+	u32 relative_address;
+	u32 start_index;
+	u32 nr_of_regs;
+	u32 *frame_registers = mali_pp_job_get_frame_registers(job);
+	u32 *wb0_registers = mali_pp_job_get_wb0_registers(job);
+	u32 *wb1_registers = mali_pp_job_get_wb1_registers(job);
+	u32 *wb2_registers = mali_pp_job_get_wb2_registers(job);
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job);
+
+	MALI_DEBUG_ASSERT_POINTER(core);
+
+	/* Write frame registers */
+
+	/*
+	 * There are two frame registers which are different for each sub job:
+	 * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME)
+	 * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK)
+	 */
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]);
+
+	/* For virtual jobs, the stack address shouldn't be broadcast but written individually */
+	if (!mali_pp_job_is_virtual(job) || restart_virtual) {
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]);
+	}
+
+	/* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */
+	relative_address = MALI200_REG_ADDR_RSW;
+	start_index = MALI200_REG_ADDR_RSW / sizeof(u32);
+	nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* MALI200_REG_ADDR_STACK_SIZE */
+	relative_address = MALI200_REG_ADDR_STACK_SIZE;
+	start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32);
+
+	mali_hw_core_register_write_relaxed_conditional(&core->hw_core,
+			relative_address, frame_registers[start_index],
+			mali_frame_registers_reset_values[start_index]);
+
+	/* Skip 2 reserved registers */
+
+	/* Write remaining registers */
+	relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X;
+	start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+	nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32);
+
+	mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core,
+			relative_address, &frame_registers[start_index],
+			nr_of_regs, &mali_frame_registers_reset_values[start_index]);
+
+	/* Write WBx registers */
+	if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */
+		mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1);
+		mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value);
+	}
+
+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED
+	if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) {
+		mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1);
+		mali_hw_core_register_write_relaxed(&core->hw_core,  MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8);
+	}
+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */
+
+	MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description));
+
+	/* Adding barrier to make sure all rester writes are finished */
+	_mali_osk_write_mem_barrier();
+
+	/* This is the command that starts the core.
+	 *
+	 * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just
+	 * force core to assert the completion interrupt.
+	 */
+#if !defined(PROFILING_SKIP_PP_JOBS)
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING);
+#else
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME);
+#endif
+
+	/* Adding barrier to make sure previous rester writes is finished */
+	_mali_osk_write_mem_barrier();
+}
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION);
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index)
+{
+	if (mali_global_num_pp_cores > index) {
+		return mali_global_pp_cores[index];
+	}
+
+	return NULL;
+}
+
+u32 mali_pp_get_glob_num_pp_cores(void)
+{
+	return mali_global_num_pp_cores;
+}
+
+/* ------------- interrupt handling below ------------------ */
+static void mali_pp_irq_probe_trigger(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR);
+	_mali_osk_mem_barrier();
+}
+
+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data)
+{
+	struct mali_pp_core *core = (struct mali_pp_core *)data;
+	u32 irq_readout;
+
+	irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS);
+	if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) {
+		mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR);
+		_mali_osk_mem_barrier();
+		return _MALI_OSK_ERR_OK;
+	}
+
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+#if 0
+static void mali_pp_print_registers(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC)));
+	MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE)));
+}
+#endif
+
+#if 0
+void mali_pp_print_state(struct mali_pp_core *core)
+{
+	MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS)));
+}
+#endif
+
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob)
+{
+	u32 val0 = 0;
+	u32 val1 = 0;
+	u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob);
+	u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob);
+#if defined(CONFIG_MALI400_PROFILING)
+	int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id);
+#endif
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src0) {
+		val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE);
+		mali_pp_job_set_perf_counter_value0(job, subjob, val0);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index, val0);
+		_mali_osk_profiling_record_global_counters(counter_index, val0);
+#endif
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER != counter_src1) {
+		val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE);
+		mali_pp_job_set_perf_counter_value1(job, subjob, val1);
+
+#if defined(CONFIG_MALI400_PROFILING)
+		_mali_osk_profiling_report_hw_counter(counter_index + 1, val1);
+		_mali_osk_profiling_record_global_counters(counter_index + 1, val1);
+#endif
+	}
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description);
+
+	return n;
+}
+#endif
diff --git a/drivers/gpu/arm/utgard/common/mali_pp.h b/drivers/gpu/arm/utgard/common/mali_pp.h
new file mode 100644
index 000000000000..45712a30e831
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pp.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_H__
+#define __MALI_PP_H__
+
+#include "mali_osk.h"
+#include "mali_pp_job.h"
+#include "mali_hw_core.h"
+
+struct mali_group;
+
+#define MALI_MAX_NUMBER_OF_PP_CORES        9
+
+/**
+ * Definition of the PP core struct
+ * Used to track a PP core in the system.
+ */
+struct mali_pp_core {
+	struct mali_hw_core  hw_core;           /**< Common for all HW cores */
+	_mali_osk_irq_t     *irq;               /**< IRQ handler */
+	u32                  core_id;           /**< Unique core ID */
+	u32                  bcast_id;          /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */
+};
+
+_mali_osk_errcode_t mali_pp_initialize(void);
+void mali_pp_terminate(void);
+
+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id);
+void mali_pp_delete(struct mali_pp_core *core);
+
+void mali_pp_stop_bus(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core);
+void mali_pp_reset_async(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core);
+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core);
+
+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual);
+
+u32 mali_pp_core_get_version(struct mali_pp_core *core);
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->core_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return core->bcast_id;
+}
+
+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index);
+u32 mali_pp_get_glob_num_pp_cores(void);
+
+/* Debug */
+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size);
+
+/**
+ * Put instrumented HW counters from the core(s) to the job object (if enabled)
+ *
+ * parent and child is always the same, except for virtual jobs on Mali-450.
+ * In this case, the counters will be enabled on the virtual core (parent),
+ * but values need to be read from the child cores.
+ *
+ * @param parent The core used to see if the counters was enabled
+ * @param child The core to actually read the values from
+ * @job Job object to update with counter values (if enabled)
+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs)
+ */
+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob);
+
+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core)
+{
+	return core->hw_core.description;
+}
+
+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core)
+{
+	u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) &
+			   MALI200_REG_VAL_IRQ_MASK_USED;
+	if (0 == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_NONE;
+	} else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) {
+		return MALI_INTERRUPT_RESULT_SUCCESS;
+	}
+	return MALI_INTERRUPT_RESULT_ERROR;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core)
+{
+	MALI_DEBUG_ASSERT_POINTER(core);
+	return mali_hw_core_register_read(&core->hw_core,
+					  MALI200_REG_ADDR_MGMT_INT_RAWSTAT);
+}
+
+
+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core)
+{
+	u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS);
+	return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE);
+}
+
+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core)
+{
+	mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED);
+}
+
+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core,
+		struct mali_pp_job *job, u32 subjob)
+{
+	u32 addr = mali_pp_job_get_addr_frame(job, subjob);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr);
+}
+
+
+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job)
+{
+	u32 addr = mali_pp_job_get_addr_stack(job, core->core_id);
+	mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr);
+}
+
+#endif /* __MALI_PP_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_pp_job.c b/drivers/gpu/arm/utgard/common/mali_pp_job.c
new file mode 100644
index 000000000000..5528360841af
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pp_job.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_pp.h"
+#include "mali_pp_job.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+#include "mali_memory_swap_alloc.h"
+#include "mali_scheduler.h"
+
+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */
+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER;   /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */
+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER };
+
+void mali_pp_job_initialize(void)
+{
+	_mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0);
+}
+
+void mali_pp_job_terminate(void)
+{
+	_mali_osk_atomic_term(&pp_counter_per_sub_job_count);
+}
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session,
+				       _mali_uk_pp_start_job_s __user *uargs, u32 id)
+{
+	struct mali_pp_job *job;
+	u32 perf_counter_flag;
+
+	job = _mali_osk_calloc(1, sizeof(struct mali_pp_job));
+	if (NULL != job) {
+
+		_mali_osk_list_init(&job->list);
+		_mali_osk_list_init(&job->session_fb_lookup_list);
+
+		if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) {
+			goto fail;
+		}
+
+		if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) {
+			MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n"));
+			goto fail;
+		}
+
+		if (!mali_pp_job_use_no_notification(job)) {
+			job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s));
+			if (NULL == job->finished_notification) goto fail;
+		}
+
+		perf_counter_flag = mali_pp_job_get_perf_counter_flag(job);
+
+		/* case when no counters came from user space
+		 * so pass the debugfs / DS-5 provided global ones to the job object */
+		if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) ||
+		      (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) {
+			u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count);
+
+			/* These counters apply for all virtual jobs, and where no per sub job counter is specified */
+			job->uargs.perf_counter_src0 = pp_counter_src0;
+			job->uargs.perf_counter_src1 = pp_counter_src1;
+
+			/* We only copy the per sub job array if it is enabled with at least one counter */
+			if (0 < sub_job_count) {
+				job->perf_counter_per_sub_job_count = sub_job_count;
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0));
+				_mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1));
+			}
+		}
+
+		job->session = session;
+		job->id = id;
+
+		job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1;
+		job->pid = _mali_osk_get_pid();
+		job->tid = _mali_osk_get_tid();
+
+		_mali_osk_atomic_init(&job->sub_jobs_completed, 0);
+		_mali_osk_atomic_init(&job->sub_job_errors, 0);
+		job->swap_status = MALI_NO_SWAP_IN;
+		job->user_notification = MALI_FALSE;
+		job->num_pp_cores_in_virtual = 0;
+
+		if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) {
+			MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n"));
+			goto fail;
+		}
+
+		if (job->uargs.num_memory_cookies > 0) {
+			u32 size;
+			u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies;
+
+			size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies);
+
+			job->memory_cookies = _mali_osk_malloc(size);
+			if (NULL == job->memory_cookies) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size));
+				goto fail;
+			}
+
+			if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) {
+				MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size));
+				goto fail;
+			}
+		}
+
+		if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) {
+			/* Not a valid job. */
+			goto fail;
+		}
+
+		mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job);
+		mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence));
+
+		mali_mem_swap_in_pages(job);
+
+		return job;
+	}
+
+fail:
+	if (NULL != job) {
+		mali_pp_job_delete(job);
+	}
+
+	return NULL;
+}
+
+void mali_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list));
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list));
+
+	if (NULL != job->memory_cookies) {
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+		/* Unmap buffers attached to job */
+		mali_dma_buf_unmap_job(job);
+#endif
+		if (MALI_NO_SWAP_IN != job->swap_status) {
+			mali_mem_swap_out_pages(job);
+		}
+
+		_mali_osk_free(job->memory_cookies);
+	}
+
+	if (job->user_notification) {
+		mali_scheduler_return_pp_job_to_user(job,
+						     job->num_pp_cores_in_virtual);
+	}
+
+	if (NULL != job->finished_notification) {
+		_mali_osk_notification_delete(job->finished_notification);
+	}
+
+	_mali_osk_atomic_term(&job->sub_jobs_completed);
+	_mali_osk_atomic_term(&job->sub_job_errors);
+
+	_mali_osk_free(job);
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list)
+{
+	struct mali_pp_job *iter;
+	struct mali_pp_job *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Find position in list/queue where job should be added. */
+	_MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list,
+					    struct mali_pp_job, list) {
+		/* job should be started after iter if iter is in progress. */
+		if (0 < iter->sub_jobs_started) {
+			break;
+		}
+
+		/*
+		 * job should be started after iter if it has a higher
+		 * job id. A span is used to handle job id wrapping.
+		 */
+		if ((mali_pp_job_get_id(job) -
+		     mali_pp_job_get_id(iter)) <
+		    MALI_SCHEDULER_JOB_ID_SPAN) {
+			break;
+		}
+	}
+
+	_mali_osk_list_add(&job->list, &iter->list);
+}
+
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		return job->uargs.perf_counter_src0;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) {
+		return job->perf_counter_per_sub_job_src0[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src0;
+}
+
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job)
+{
+	/* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */
+	if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) {
+		/* Virtual jobs always use the global job counter */
+		return job->uargs.perf_counter_src1;
+	}
+
+	/* Use per sub job counter if enabled... */
+	if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) {
+		return job->perf_counter_per_sub_job_src1[sub_job];
+	}
+
+	/* ...else default to global job counter */
+	return job->uargs.perf_counter_src1;
+}
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter)
+{
+	pp_counter_src0 = counter;
+}
+
+void mali_pp_job_set_pp_counter_global_src1(u32 counter)
+{
+	pp_counter_src1 = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src0[sub_job] = counter;
+}
+
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+
+	if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) {
+		/* increment count since existing counter was disabled */
+		_mali_osk_atomic_inc(&pp_counter_per_sub_job_count);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == counter) {
+		/* decrement count since new counter is disabled */
+		_mali_osk_atomic_dec(&pp_counter_per_sub_job_count);
+	}
+
+	/* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */
+
+	pp_counter_per_sub_job_src1[sub_job] = counter;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src0(void)
+{
+	return pp_counter_src0;
+}
+
+u32 mali_pp_job_get_pp_counter_global_src1(void)
+{
+	return pp_counter_src1;
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src0[sub_job];
+}
+
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job)
+{
+	MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS);
+	return pp_counter_per_sub_job_src1[sub_job];
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_pp_job.h b/drivers/gpu/arm/utgard/common/mali_pp_job.h
new file mode 100644
index 000000000000..7b9d2efa3019
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_pp_job.h
@@ -0,0 +1,574 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PP_JOB_H__
+#define __MALI_PP_JOB_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_uk_types.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "regs/mali_200_regs.h"
+#include "mali_kernel_core.h"
+#include "mali_dlbu.h"
+#include "mali_timeline.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#include "linux/mali_memory_dma_buf.h"
+#endif
+
+typedef enum pp_job_status {
+	MALI_NO_SWAP_IN,
+	MALI_SWAP_IN_FAIL,
+	MALI_SWAP_IN_SUCC,
+} pp_job_status;
+
+/**
+ * This structure represents a PP job, including all sub jobs.
+ *
+ * The PP job object itself is not protected by any single lock,
+ * but relies on other locks instead (scheduler, executor and timeline lock).
+ * Think of the job object as moving between these sub systems through-out
+ * its lifetime. Different part of the PP job struct is used by different
+ * subsystems. Accessor functions ensure that correct lock is taken.
+ * Do NOT access any data members directly from outside this module!
+ */
+struct mali_pp_job {
+	/*
+	 * These members are typically only set at creation,
+	 * and only read later on.
+	 * They do not require any lock protection.
+	 */
+	_mali_uk_pp_start_job_s uargs;                     /**< Arguments from user space */
+	struct mali_session_data *session;                 /**< Session which submitted this job */
+	u32 pid;                                           /**< Process ID of submitting process */
+	u32 tid;                                           /**< Thread ID of submitting thread */
+	u32 id;                                            /**< Identifier for this job in kernel space (sequential numbering) */
+	u32 cache_order;                                   /**< Cache order used for L2 cache flushing (sequential numbering) */
+	struct mali_timeline_tracker tracker;              /**< Timeline tracker for this job */
+	_mali_osk_notification_t *finished_notification;   /**< Notification sent back to userspace on job complete */
+	u32 perf_counter_per_sub_job_count;                /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */
+	u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */
+	u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */
+	u32 sub_jobs_num;                                  /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */
+
+	pp_job_status swap_status;                         /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */
+	mali_bool user_notification;                       /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */
+	u32 num_pp_cores_in_virtual;                       /**< How many PP cores we have when job finished */
+
+	/*
+	 * These members are used by both scheduler and executor.
+	 * They are "protected" by atomic operations.
+	 */
+	_mali_osk_atomic_t sub_jobs_completed;                            /**< Number of completed sub-jobs in this superjob */
+	_mali_osk_atomic_t sub_job_errors;                                /**< Bitfield with errors (errors for each single sub-job is or'ed together) */
+
+	/*
+	 * These members are used by scheduler, but only when no one else
+	 * knows about this job object but the working function.
+	 * No lock is thus needed for these.
+	 */
+	u32 *memory_cookies;                               /**< Memory cookies attached to job */
+
+	/*
+	 * These members are used by the scheduler,
+	 * protected by scheduler lock
+	 */
+	_mali_osk_list_t list;                             /**< Used to link jobs together in the scheduler queue */
+	_mali_osk_list_t session_fb_lookup_list;           /**< Used to link jobs together from the same frame builder in the session */
+	u32 sub_jobs_started;                              /**< Total number of sub-jobs started (always started in ascending order) */
+
+	/*
+	 * Set by executor/group on job completion, read by scheduler when
+	 * returning job to user. Hold executor lock when setting,
+	 * no lock needed when reading
+	 */
+	u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 0 (to be returned to user space), one for each sub job */
+	u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS];    /**< Value of performance counter 1 (to be returned to user space), one for each sub job */
+};
+
+void mali_pp_job_initialize(void);
+void mali_pp_job_terminate(void);
+
+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id);
+void mali_pp_job_delete(struct mali_pp_job *job);
+
+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job);
+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job);
+
+void mali_pp_job_set_pp_counter_global_src0(u32 counter);
+void mali_pp_job_set_pp_counter_global_src1(u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter);
+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter);
+
+u32 mali_pp_job_get_pp_counter_global_src0(void);
+u32 mali_pp_job_get_pp_counter_global_src1(void);
+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job);
+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job);
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->id;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job,
+		u32 cache_order)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	job->cache_order = cache_order;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (NULL == job) ? 0 : job->cache_order;
+}
+
+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.user_job_ptr;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_builder_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.flush_id;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->pid;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->tid;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.frame_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.dlbu_registers;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job)
+{
+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470))
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE;
+#else
+	return MALI_FALSE;
+#endif
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (mali_pp_job_is_virtual(job)) {
+		return MALI_DLBU_VIRT_ADDR;
+	} else if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_frame[sub_job - 1];
+	}
+
+	return 0;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 == sub_job) {
+		return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)];
+	} else if (sub_job < _MALI_PP_MAX_SUB_JOBS) {
+		return job->uargs.frame_registers_addr_stack[sub_job - 1];
+	}
+
+	return 0;
+}
+
+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list);
+
+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	_mali_osk_list_addtail(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job,
+		_mali_osk_list_t *list)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list));
+	_mali_osk_list_move(&job->list, list);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->list);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers;
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ||
+	    job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT]
+	   ) {
+		/* At least one output unit active */
+		return MALI_FALSE;
+	}
+
+	/* All outputs are disabled - we can abort the job */
+	return MALI_TRUE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job)
+{
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id;
+
+	MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id);
+
+	_mali_osk_list_addtail(&job->session_fb_lookup_list,
+			       &job->session->pp_job_fb_lookup_list[fb_lookup_id]);
+}
+
+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	_mali_osk_list_delinit(&job->session_fb_lookup_list);
+}
+
+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->session;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE;
+}
+
+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job.
+   Makes sure that no new subjobs are started. */
+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job)
+{
+	u32 jobs_remaining;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	jobs_remaining = job->sub_jobs_num - job->sub_jobs_started;
+	job->sub_jobs_started += jobs_remaining;
+
+	/* Not the most optimal way, but this is only used in error cases */
+	for (i = 0; i < jobs_remaining; i++) {
+		_mali_osk_atomic_inc(&job->sub_jobs_completed);
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->sub_jobs_num ==
+		_mali_osk_atomic_read(&job->sub_jobs_completed)) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	return job->sub_jobs_started;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->sub_jobs_num;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started);
+	return (job->sub_jobs_num - job->sub_jobs_started);
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.num_memory_cookies;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie(
+	struct mali_pp_job *job, u32 index)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies);
+	MALI_DEBUG_ASSERT_POINTER(job->memory_cookies);
+	return job->memory_cookies[index];
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	if (0 < job->uargs.num_memory_cookies) {
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+
+	/* Assert that we are marking the "first unstarted sub job" as started */
+	MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job);
+
+	job->sub_jobs_started++;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_atomic_inc(&job->sub_jobs_completed);
+	if (MALI_FALSE == success) {
+		_mali_osk_atomic_inc(&job->sub_job_errors);
+	}
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) {
+		return MALI_TRUE;
+	}
+	return MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job)
+{
+	/*
+	 * A pilot job is currently identified as jobs which
+	 * require no callback notification.
+	 */
+	return mali_pp_job_use_no_notification(job);
+}
+
+MALI_STATIC_INLINE _mali_osk_notification_t *
+mali_pp_job_get_finished_notification(struct mali_pp_job *job)
+{
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->finished_notification);
+
+	notification = job->finished_notification;
+	job->finished_notification = NULL;
+
+	return notification;
+}
+
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE)
+	       ? MALI_TRUE : MALI_FALSE;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->uargs.perf_counter_flag;
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value0[sub_job];
+}
+
+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return job->perf_counter_value1[sub_job];
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value0[sub_job] = value;
+}
+
+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD();
+	job->perf_counter_value1[sub_job] = value;
+}
+
+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted.
+ *
+ * @param job Job to check.
+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job));
+
+	return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num);
+}
+
+/**
+ * Get PP job's Timeline tracker.
+ *
+ * @param job PP job.
+ * @return Pointer to Timeline tracker for the job.
+ */
+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return &(job->tracker);
+}
+
+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr(
+	struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr;
+}
+
+
+#endif /* __MALI_PP_JOB_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_scheduler.c b/drivers/gpu/arm/utgard/common/mali_scheduler.c
new file mode 100644
index 000000000000..5547159db94c
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_scheduler.c
@@ -0,0 +1,1354 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_scheduler.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_profiling.h"
+#include "mali_kernel_utilization.h"
+#include "mali_timeline.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+#include "mali_group.h"
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+#include <linux/sched.h>
+#include <trace/events/gpu.h>
+#endif
+/*
+ * ---------- static defines/constants ----------
+ */
+
+/*
+ * If dma_buf with map on demand is used, we defer job queue
+ * if in atomic context, since both might sleep.
+ */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1
+#endif
+#endif
+
+
+/*
+ * ---------- global variables (exported due to inline functions) ----------
+ */
+
+/* Lock protecting this module */
+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL;
+
+/* Queue of jobs to be executed on the GP group */
+struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+struct mali_scheduler_job_queue job_queue_pp;
+
+_mali_osk_atomic_t mali_job_id_autonumber;
+_mali_osk_atomic_t mali_job_cache_order_autonumber;
+/*
+ * ---------- static variables ----------
+ */
+
+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL;
+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL;
+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL;
+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list);
+#endif
+
+/*
+ * ---------- Forward declaration of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job);
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job);
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job);
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job);
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success);
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job);
+void mali_scheduler_do_pp_job_delete(void *arg);
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job);
+static void mali_scheduler_do_pp_job_queue(void *arg);
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+/*
+ * ---------- Actual implementation ----------
+ */
+
+_mali_osk_errcode_t mali_scheduler_initialize(void)
+{
+	_mali_osk_atomic_init(&mali_job_id_autonumber, 0);
+	_mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0);
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri);
+	job_queue_gp.depth = 0;
+	job_queue_gp.big_job_num = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri);
+	_MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri);
+	job_queue_pp.depth = 0;
+	job_queue_pp.big_job_num = 0;
+
+	mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init(
+					  _MALI_OSK_LOCKFLAG_ORDERED,
+					  _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == mali_scheduler_lock_obj) {
+		mali_scheduler_terminate();
+	}
+
+	scheduler_wq_pp_job_delete = _mali_osk_wq_create_work(
+					     mali_scheduler_do_pp_job_delete, NULL);
+	if (NULL == scheduler_wq_pp_job_delete) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init(
+					       _MALI_OSK_LOCKFLAG_ORDERED,
+					       _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_delete_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	scheduler_wq_pp_job_queue = _mali_osk_wq_create_work(
+					    mali_scheduler_do_pp_job_queue, NULL);
+	if (NULL == scheduler_wq_pp_job_queue) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init(
+					      _MALI_OSK_LOCKFLAG_ORDERED,
+					      _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED);
+	if (NULL == scheduler_pp_job_queue_lock) {
+		mali_scheduler_terminate();
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_scheduler_terminate(void)
+{
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (NULL != scheduler_pp_job_queue_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock);
+		scheduler_pp_job_queue_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_queue) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_queue);
+		scheduler_wq_pp_job_queue = NULL;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	if (NULL != scheduler_pp_job_delete_lock) {
+		_mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock);
+		scheduler_pp_job_delete_lock = NULL;
+	}
+
+	if (NULL != scheduler_wq_pp_job_delete) {
+		_mali_osk_wq_delete_work(scheduler_wq_pp_job_delete);
+		scheduler_wq_pp_job_delete = NULL;
+	}
+
+	if (NULL != mali_scheduler_lock_obj) {
+		_mali_osk_spinlock_irq_term(mali_scheduler_lock_obj);
+		mali_scheduler_lock_obj = NULL;
+	}
+
+	_mali_osk_atomic_term(&mali_job_cache_order_autonumber);
+	_mali_osk_atomic_term(&mali_job_id_autonumber);
+}
+
+u32 mali_scheduler_job_physical_head_count(void)
+{
+	/*
+	 * Count how many physical sub jobs are present from the head of queue
+	 * until the first virtual job is present.
+	 * Early out when we have reached maximum number of PP cores (8)
+	 */
+	u32 count = 0;
+	struct mali_pp_job *job;
+	struct mali_pp_job *temp;
+
+	/* Check for partially started normal pri jobs */
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					   struct mali_pp_job, list);
+
+		MALI_DEBUG_ASSERT_POINTER(job);
+
+		if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) {
+			/*
+			 * Remember; virtual jobs can't be queued and started
+			 * at the same time, so this must be a physical job
+			 */
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (MALI_FALSE == mali_pp_job_is_virtual(job)) {
+			count += mali_pp_job_unstarted_sub_job_count(job);
+			if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) {
+				return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (MALI_FALSE == mali_pp_job_is_virtual(job)) {
+			/* any partially started is already counted */
+			if (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+				count += mali_pp_job_unstarted_sub_job_count(job);
+				if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <=
+				    count) {
+					return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS;
+				}
+			}
+		} else {
+			/* Came across a virtual job, so stop counting */
+			return count;
+		}
+	}
+
+	return count;
+}
+
+mali_bool mali_scheduler_job_next_is_virtual(void)
+{
+	struct mali_pp_job *job;
+
+	job = mali_scheduler_job_pp_virtual_peek();
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job));
+
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void)
+{
+	_mali_osk_list_t *queue;
+	struct mali_gp_job *job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT(0 < job_queue_gp.depth);
+	MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth);
+
+	if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+		MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue));
+	}
+
+	job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list);
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_gp_job_list_remove(job);
+	job_queue_gp.depth--;
+	if (job->big_job) {
+		job_queue_gp.big_job_num --;
+		if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) {
+			/* wake up process */
+			wait_queue_head_t *queue = mali_session_get_wait_queue();
+			wake_up(queue);
+		}
+	}
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	/*
+	 * For PP jobs we favour partially started jobs in normal
+	 * priority queue over unstarted jobs in high priority queue
+	 */
+
+	if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+					       struct mali_pp_job, list);
+		MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+		if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job ||
+	    MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) {
+		/*
+		 * There isn't a partially started job in normal queue, so
+		 * look in high priority queue.
+		 */
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+						       struct mali_pp_job, list);
+			MALI_DEBUG_ASSERT(NULL != tmp_job);
+
+			if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void)
+{
+	struct mali_pp_job *job = NULL;
+	struct mali_pp_job *tmp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+	if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+		MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+		tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next,
+					       struct mali_pp_job, list);
+
+		if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+			job = tmp_job;
+		}
+	}
+
+	if (NULL == job) {
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			MALI_DEBUG_ASSERT(0 < job_queue_pp.depth);
+
+			tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next,
+						       struct mali_pp_job, list);
+
+			if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) {
+				job = tmp_job;
+			}
+		}
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek();
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		*sub_job = mali_pp_job_get_first_unstarted_sub_job(job);
+
+		mali_pp_job_mark_sub_job_started(job, *sub_job);
+		if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) {
+			/* Remove from queue when last sub job has been retrieved */
+			mali_pp_job_list_remove(job);
+		}
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void)
+{
+	struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek();
+
+	MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job));
+
+	if (NULL != job) {
+		MALI_DEBUG_ASSERT(0 ==
+				  mali_pp_job_get_first_unstarted_sub_job(job));
+		MALI_DEBUG_ASSERT(1 ==
+				  mali_pp_job_get_sub_job_count(job));
+
+		mali_pp_job_mark_sub_job_started(job, 0);
+
+		mali_pp_job_list_remove(job);
+
+		job_queue_pp.depth--;
+
+		/*
+		 * Job about to start so it is no longer be
+		 * possible to discard WB
+		 */
+		mali_pp_job_fb_lookup_remove(job);
+	}
+
+	return job;
+}
+
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_gp_job_get_id(job), job));
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_gp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(job));
+		mali_gp_job_signal_pp_tracker(job, MALI_FALSE);
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_gp_job(job, MALI_FALSE,
+					       MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+
+	return MALI_SCHEDULER_MASK_GP;
+}
+
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n",
+			     mali_pp_job_get_id(job), job));
+
+	if (MALI_TRUE == mali_timeline_tracker_activation_error(
+		    mali_pp_job_get_tracker(job))) {
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n",
+				     mali_pp_job_get_id(job), job));
+
+		mali_scheduler_lock();
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+	if (mali_pp_job_needs_dma_buf_mapping(job)) {
+		mali_scheduler_deferred_pp_job_queue(job);
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+	mali_scheduler_lock();
+
+	if (!mali_scheduler_queue_pp_job(job)) {
+		/* Failed to enqueue job, release job (with error) */
+		mali_pp_job_fb_lookup_remove(job);
+		mali_pp_job_mark_unstarted_failed(job);
+		mali_scheduler_unlock();
+
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(job));
+
+		/* This will notify user space and close the job object */
+		mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	mali_scheduler_unlock();
+	return MALI_SCHEDULER_MASK_PP;
+}
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	if (user_notification) {
+		mali_scheduler_return_gp_job_to_user(job, success);
+	}
+
+	if (dequeued) {
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_gp_end();
+		}
+	}
+
+	mali_gp_job_delete(job);
+}
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued)
+{
+	job->user_notification = user_notification;
+	job->num_pp_cores_in_virtual = num_cores_in_virtual;
+
+	if (dequeued) {
+#if defined(CONFIG_MALI_DVFS)
+		if (mali_pp_job_is_window_surface(job)) {
+			struct mali_session_data *session;
+			session = mali_pp_job_get_session(job);
+			mali_session_inc_num_window_jobs(session);
+		}
+#endif
+
+		_mali_osk_pm_dev_ref_put();
+
+		if (mali_utilization_enabled()) {
+			mali_utilization_pp_end();
+		}
+	}
+
+	/* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */
+	mali_scheduler_deferred_pp_job_delete(job);
+}
+
+void mali_scheduler_abort_session(struct mali_session_data *session)
+{
+	struct mali_gp_job *gp_job;
+	struct mali_gp_job *gp_tmp;
+	struct mali_pp_job *pp_job;
+	struct mali_pp_job *pp_tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp);
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n",
+			     session));
+
+	mali_scheduler_lock();
+
+	/* Remove from GP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from GP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri,
+				    struct mali_gp_job, list) {
+		if (mali_gp_job_get_session(gp_job) == session) {
+			mali_gp_job_list_move(gp_job, &removed_jobs_gp);
+			job_queue_gp.depth--;
+			job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0;
+		}
+	}
+
+	/* Remove from PP normal priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.normal_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/* Remove from PP high priority queue */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp,
+				    &job_queue_pp.high_pri,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_get_session(pp_job) == session) {
+			mali_pp_job_fb_lookup_remove(pp_job);
+
+			job_queue_pp.depth -=
+				mali_pp_job_unstarted_sub_job_count(
+					pp_job);
+			mali_pp_job_mark_unstarted_failed(pp_job);
+
+			if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) {
+				if (mali_pp_job_is_complete(pp_job)) {
+					mali_pp_job_list_move(pp_job,
+							      &removed_jobs_pp);
+				} else {
+					mali_pp_job_list_remove(pp_job);
+				}
+			}
+		}
+	}
+
+	/*
+	 * Release scheduler lock so we can release trackers
+	 * (which will potentially queue new jobs)
+	 */
+	mali_scheduler_unlock();
+
+	/* Release and complete all (non-running) found GP jobs  */
+	_MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp,
+				    struct mali_gp_job, list) {
+		mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job));
+		mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE);
+		_mali_osk_list_delinit(&gp_job->list);
+		mali_scheduler_complete_gp_job(gp_job,
+					       MALI_FALSE, MALI_FALSE, MALI_TRUE);
+	}
+
+	/* Release and complete non-running PP jobs */
+	_MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp,
+				    struct mali_pp_job, list) {
+		mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job));
+		_mali_osk_list_delinit(&pp_job->list);
+		mali_scheduler_complete_pp_job(pp_job, 0,
+					       MALI_FALSE, MALI_TRUE);
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx,
+		_mali_uk_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_gp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(),
+				 NULL);
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_gp_job(session, job);
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx,
+		_mali_uk_pp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	mali_timeline_point point;
+	u32 __user *point_ptr = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)ctx;
+
+	job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id());
+	if (NULL == job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job);
+
+	point = mali_scheduler_submit_pp_job(session, job);
+	job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the job was started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx,
+		_mali_uk_pp_and_gp_start_job_s *uargs)
+{
+	struct mali_session_data *session;
+	_mali_uk_pp_and_gp_start_job_s kargs;
+	struct mali_pp_job *pp_job;
+	struct mali_gp_job *gp_job;
+	u32 __user *point_ptr = NULL;
+	mali_timeline_point point;
+	_mali_uk_pp_start_job_s __user *pp_args;
+	_mali_uk_gp_start_job_s __user *gp_args;
+
+	MALI_DEBUG_ASSERT_POINTER(ctx);
+	MALI_DEBUG_ASSERT_POINTER(uargs);
+
+	session = (struct mali_session_data *) ctx;
+
+	if (0 != _mali_osk_copy_from_user(&kargs, uargs,
+					  sizeof(_mali_uk_pp_and_gp_start_job_s))) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args;
+	gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args;
+
+	pp_job = mali_pp_job_create(session, pp_args,
+				    mali_scheduler_get_new_id());
+	if (NULL == pp_job) {
+		MALI_PRINT_ERROR(("Failed to create PP job.\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	gp_job = mali_gp_job_create(session, gp_args,
+				    mali_scheduler_get_new_id(),
+				    mali_pp_job_get_tracker(pp_job));
+	if (NULL == gp_job) {
+		MALI_PRINT_ERROR(("Failed to create GP job.\n"));
+		mali_pp_job_delete(pp_job);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job);
+
+	/* Submit GP job. */
+	mali_scheduler_submit_gp_job(session, gp_job);
+	gp_job = NULL;
+
+	/* Submit PP job. */
+	point = mali_scheduler_submit_pp_job(session, pp_job);
+	pp_job = NULL;
+
+	if (0 != _mali_osk_put_user(((u32) point), point_ptr)) {
+		/*
+		 * Let user space know that something failed
+		 * after the jobs were started.
+		 */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args)
+{
+	struct mali_session_data *session;
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	u32 fb_lookup_id;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+	MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK;
+
+	mali_scheduler_lock();
+
+	/* Iterate over all jobs for given frame builder_id. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp,
+				    &session->pp_job_fb_lookup_list[fb_lookup_id],
+				    struct mali_pp_job, session_fb_lookup_list) {
+		MALI_DEBUG_CODE(u32 disable_mask = 0);
+
+		if (mali_pp_job_get_frame_builder_id(job) !=
+		    (u32) args->fb_id) {
+			MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n"));
+			continue;
+		}
+
+		MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3));
+
+		if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1));
+			mali_pp_job_disable_wb0(job);
+		}
+
+		if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2));
+			mali_pp_job_disable_wb1(job);
+		}
+
+		if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) {
+			MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3));
+			mali_pp_job_disable_wb2(job);
+		}
+		MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n",
+				     disable_mask));
+	}
+
+	mali_scheduler_unlock();
+}
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size)
+{
+	int n = 0;
+
+	n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_gp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.normal_pri) ?
+				"empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_gp.high_pri) ?
+				"empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"PP queues\n");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tQueue depth: %u\n", job_queue_pp.depth);
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tNormal priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.normal_pri)
+				? "empty" : "not empty");
+	n += _mali_osk_snprintf(buf + n, size - n,
+				"\tHigh priority queue is %s\n",
+				_mali_osk_list_empty(&job_queue_pp.high_pri)
+				? "empty" : "not empty");
+
+	n += _mali_osk_snprintf(buf + n, size - n, "\n");
+
+	return n;
+}
+#endif
+
+/*
+ * ---------- Implementation of static functions ----------
+ */
+
+static mali_timeline_point mali_scheduler_submit_gp_job(
+	struct mali_session_data *session, struct mali_gp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_gp_job_get_tracker(job), MALI_TIMELINE_GP);
+
+	return point;
+}
+
+static mali_timeline_point mali_scheduler_submit_pp_job(
+	struct mali_session_data *session, struct mali_pp_job *job)
+{
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	mali_scheduler_lock();
+	/*
+	 * Adding job to the lookup list used to quickly discard
+	 * writeback units of queued jobs.
+	 */
+	mali_pp_job_fb_lookup_add(job);
+	mali_scheduler_unlock();
+
+	/* Add job to Timeline system. */
+	point = mali_timeline_system_add_tracker(session->timeline_system,
+			mali_pp_job_get_tracker(job), MALI_TIMELINE_PP);
+
+	return point;
+}
+
+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_gp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	}
+
+	mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	/* Determine which queue the job should be added to. */
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_gp.high_pri;
+	} else {
+		queue = &job_queue_gp.normal_pri;
+	}
+
+	job_queue_gp.depth += 1;
+	job_queue_gp.big_job_num += (job->big_job) ? 1 : 0;
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_gp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the GP as busy from the
+		 * time a GP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_gp_start();
+	}
+
+	/* Add profiling events for job enqueued */
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE,
+		mali_gp_job_get_pid(job),
+		mali_gp_job_get_tid(job),
+		mali_gp_job_get_frame_builder_id(job),
+		mali_gp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_gp_job_get_tid(job),
+			      mali_gp_job_get_id(job), "GP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n",
+			     mali_gp_job_get_id(job), job));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job)
+{
+	struct mali_session_data *session;
+	_mali_osk_list_t *queue = NULL;
+
+	MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD();
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (unlikely(session->is_aborting)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE; /* job not queued */
+	} else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) {
+		MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n",
+				     mali_pp_job_get_id(job), job));
+		return MALI_FALSE;
+	}
+
+	mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order());
+
+	if (session->use_high_priority_job_queue) {
+		queue = &job_queue_pp.high_pri;
+	} else {
+		queue = &job_queue_pp.normal_pri;
+	}
+
+	job_queue_pp.depth +=
+		mali_pp_job_get_sub_job_count(job);
+
+	/* Add job to queue (mali_gp_job_queue_add find correct place). */
+	mali_pp_job_list_add(job, queue);
+
+	/*
+	 * We hold a PM reference for every job we hold queued (and running)
+	 * It is important that we take this reference after job has been
+	 * added the the queue so that any runtime resume could schedule this
+	 * job right there and then.
+	 */
+	_mali_osk_pm_dev_ref_get_async();
+
+	if (mali_utilization_enabled()) {
+		/*
+		 * We cheat a little bit by counting the PP as busy from the
+		 * time a PP job is queued. This will be fine because we only
+		 * loose the tiny idle gap between jobs, but we will instead
+		 * get less utilization work to do (less locks taken)
+		 */
+		mali_utilization_pp_start();
+	}
+
+	/* Add profiling events for job enqueued */
+
+	_mali_osk_profiling_add_event(
+		MALI_PROFILING_EVENT_TYPE_SINGLE |
+		MALI_PROFILING_EVENT_CHANNEL_SOFTWARE |
+		MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE,
+		mali_pp_job_get_pid(job),
+		mali_pp_job_get_tid(job),
+		mali_pp_job_get_frame_builder_id(job),
+		mali_pp_job_get_flush_id(job),
+		0);
+
+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS)
+	trace_gpu_job_enqueue(mali_pp_job_get_tid(job),
+			      mali_pp_job_get_id(job), "PP");
+#endif
+
+	MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n",
+			     mali_pp_job_is_virtual(job)
+			     ? "Virtual" : "Physical",
+			     mali_pp_job_get_id(job), job,
+			     mali_pp_job_get_sub_job_count(job)));
+
+	return MALI_TRUE; /* job queued */
+}
+
+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job,
+		mali_bool success)
+{
+	_mali_uk_gp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_gp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_gp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count();
+
+	jobres->user_job_ptr = mali_gp_job_get_user_id(job);
+	if (MALI_TRUE == success) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+	jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job);
+	jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job);
+	jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job);
+
+	mali_session_send_notification(session, notification);
+}
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual)
+{
+	u32 i;
+	u32 num_counters_to_copy;
+	_mali_uk_pp_job_finished_s *jobres;
+	struct mali_session_data *session;
+	_mali_osk_notification_t *notification;
+
+	if (MALI_TRUE == mali_pp_job_use_no_notification(job)) {
+		return;
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	session = mali_pp_job_get_session(job);
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	notification = mali_pp_job_get_finished_notification(job);
+	MALI_DEBUG_ASSERT_POINTER(notification);
+
+	jobres = notification->result_buffer;
+	MALI_DEBUG_ASSERT_POINTER(jobres);
+
+	jobres->user_job_ptr = mali_pp_job_get_user_id(job);
+	if (MALI_TRUE == mali_pp_job_was_success(job)) {
+		jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS;
+	} else {
+		jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR;
+	}
+
+	if (mali_pp_job_is_virtual(job)) {
+		num_counters_to_copy = num_cores_in_virtual;
+	} else {
+		num_counters_to_copy = mali_pp_job_get_sub_job_count(job);
+	}
+
+	for (i = 0; i < num_counters_to_copy; i++) {
+		jobres->perf_counter0[i] =
+			mali_pp_job_get_perf_counter_value0(job, i);
+		jobres->perf_counter1[i] =
+			mali_pp_job_get_perf_counter_value1(job, i);
+		jobres->perf_counter_src0 =
+			mali_pp_job_get_pp_counter_global_src0();
+		jobres->perf_counter_src1 =
+			mali_pp_job_get_pp_counter_global_src1();
+	}
+
+	mali_session_send_notification(session, notification);
+}
+
+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete);
+}
+
+void mali_scheduler_do_pp_job_delete(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be deleted, so we can release
+	 * the lock before we start deleting the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		_mali_osk_list_delinit(&job->list);
+
+		mali_pp_job_delete(job); /* delete the job object itself */
+	}
+}
+
+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE)
+
+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	_mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue);
+}
+
+static void mali_scheduler_do_pp_job_queue(void *arg)
+{
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(list);
+	struct mali_pp_job *job;
+	struct mali_pp_job *tmp;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_IGNORE(arg);
+
+	/*
+	 * Quickly "unhook" the jobs pending to be queued, so we can release
+	 * the lock before we start queueing the job objects
+	 * (without any locks held)
+	 */
+	_mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock);
+	_mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list);
+	_mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock);
+
+	/* First loop through all jobs and do the pre-work (no locks needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+		if (mali_pp_job_needs_dma_buf_mapping(job)) {
+			/*
+			 * This operation could fail, but we continue anyway,
+			 * because the worst that could happen is that this
+			 * job will fail due to a Mali page fault.
+			 */
+			mali_dma_buf_map_job(job);
+		}
+	}
+
+	mali_scheduler_lock();
+
+	/* Then loop through all jobs again to queue them (lock needed) */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list,
+				    struct mali_pp_job, list) {
+
+		/* Remove from scheduler_pp_job_queue_list before queueing */
+		mali_pp_job_list_remove(job);
+
+		if (mali_scheduler_queue_pp_job(job)) {
+			/* Job queued successfully */
+			schedule_mask |= MALI_SCHEDULER_MASK_PP;
+		} else {
+			/* Failed to enqueue job, release job (with error) */
+			mali_pp_job_fb_lookup_remove(job);
+			mali_pp_job_mark_unstarted_failed(job);
+
+			/* unlock scheduler in this uncommon case */
+			mali_scheduler_unlock();
+
+			schedule_mask |= mali_timeline_tracker_release(
+						 mali_pp_job_get_tracker(job));
+
+			/* Notify user space and close the job object */
+			mali_scheduler_complete_pp_job(job, 0, MALI_TRUE,
+						       MALI_FALSE);
+
+			mali_scheduler_lock();
+		}
+	}
+
+	mali_scheduler_unlock();
+
+	/* Trigger scheduling of jobs */
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */
+
+void mali_scheduler_gp_pp_job_queue_print(void)
+{
+	struct mali_gp_job *gp_job = NULL;
+	struct mali_gp_job *tmp_gp_job = NULL;
+	struct mali_pp_job *pp_job = NULL;
+	struct mali_pp_job *tmp_pp_job = NULL;
+
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+	MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj);
+
+	/* dump job queup status */
+	if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) {
+		MALI_PRINT(("No GP&PP job in the job queue.\n"));
+		return;
+	}
+
+	MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth));
+	if (job_queue_gp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri,
+						    struct mali_gp_job, list) {
+				MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid));
+			}
+		}
+	}
+
+	MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth));
+	if (job_queue_pp.depth > 0) {
+		if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+
+		if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) {
+			_MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri,
+						    struct mali_pp_job, list) {
+				if (mali_pp_job_is_virtual(pp_job)) {
+					MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				} else {
+					MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid));
+				}
+			}
+		}
+	}
+
+	/* dump group running job status */
+	mali_executor_running_status_print();
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_scheduler.h b/drivers/gpu/arm/utgard/common/mali_scheduler.h
new file mode 100644
index 000000000000..f24cf42b8a79
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_scheduler.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_H__
+#define __MALI_SCHEDULER_H__
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_scheduler_types.h"
+#include "mali_session.h"
+
+struct mali_scheduler_job_queue {
+	_MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */
+	_MALI_OSK_LIST_HEAD(high_pri);   /* Queued jobs with high priority */
+	u32 depth;                       /* Depth of combined queues. */
+	u32 big_job_num;
+};
+
+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj;
+
+/* Queue of jobs to be executed on the GP group */
+extern struct mali_scheduler_job_queue job_queue_gp;
+
+/* Queue of PP jobs */
+extern struct mali_scheduler_job_queue job_queue_pp;
+
+extern _mali_osk_atomic_t mali_job_id_autonumber;
+extern _mali_osk_atomic_t mali_job_cache_order_autonumber;
+
+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj);
+
+_mali_osk_errcode_t mali_scheduler_initialize(void);
+void mali_scheduler_terminate(void);
+
+MALI_STATIC_INLINE void mali_scheduler_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj);
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n"));
+}
+
+MALI_STATIC_INLINE void mali_scheduler_unlock(void)
+{
+	MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n"));
+	_mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void)
+{
+	return job_queue_gp.depth;
+}
+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void)
+{
+	return job_queue_gp.big_job_num;
+}
+
+u32 mali_scheduler_job_physical_head_count(void);
+
+mali_bool mali_scheduler_job_next_is_virtual(void);
+
+struct mali_gp_job *mali_scheduler_job_gp_get(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void);
+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job);
+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void);
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_id_autonumber);
+}
+
+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void)
+{
+	return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber);
+}
+
+/**
+ * @brief Used by the Timeline system to queue a GP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The GP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job);
+
+/**
+ * @brief Used by the Timeline system to queue a PP job.
+ *
+ * @note @ref mali_executor_schedule_from_mask() should be called if this
+ * function returns non-zero.
+ *
+ * @param job The PP job that is being activated.
+ *
+ * @return A scheduling bitmask that can be used to decide if scheduling is
+ * necessary after this call.
+ */
+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job);
+
+void mali_scheduler_complete_gp_job(struct mali_gp_job *job,
+				    mali_bool success,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_complete_pp_job(struct mali_pp_job *job,
+				    u32 num_cores_in_virtual,
+				    mali_bool user_notification,
+				    mali_bool dequeued);
+
+void mali_scheduler_abort_session(struct mali_session_data *session);
+
+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job,
+		u32 num_cores_in_virtual);
+
+#if MALI_STATE_TRACKING
+u32 mali_scheduler_dump_state(char *buf, u32 size);
+#endif
+
+void mali_scheduler_gp_pp_job_queue_print(void);
+
+#endif /* __MALI_SCHEDULER_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_scheduler_types.h b/drivers/gpu/arm/utgard/common/mali_scheduler_types.h
new file mode 100644
index 000000000000..f862961d146e
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_scheduler_types.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SCHEDULER_TYPES_H__
+#define __MALI_SCHEDULER_TYPES_H__
+
+#include "mali_osk.h"
+
+#define MALI_SCHEDULER_JOB_ID_SPAN 65535
+
+/**
+ * Bitmask used for defered scheduling of subsystems.
+ */
+typedef u32 mali_scheduler_mask;
+
+#define MALI_SCHEDULER_MASK_GP (1<<0)
+#define MALI_SCHEDULER_MASK_PP (1<<1)
+
+#define MALI_SCHEDULER_MASK_EMPTY 0
+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP)
+
+#endif /* __MALI_SCHEDULER_TYPES_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_session.c b/drivers/gpu/arm/utgard/common/mali_session.c
new file mode 100644
index 000000000000..e0a2805b13d3
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_session.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_session.h"
+#include "mali_ukk.h"
+#ifdef MALI_MEM_SWAP_TRACKING
+#include "mali_memory_swap_alloc.h"
+#endif
+
+_MALI_OSK_LIST_HEAD(mali_sessions);
+static u32 mali_session_count = 0;
+
+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL;
+wait_queue_head_t pending_queue;
+
+_mali_osk_errcode_t mali_session_initialize(void)
+{
+	_MALI_OSK_INIT_LIST_HEAD(&mali_sessions);
+	/* init wait queue for big varying job */
+	init_waitqueue_head(&pending_queue);
+
+	mali_sessions_lock = _mali_osk_spinlock_irq_init(
+				     _MALI_OSK_LOCKFLAG_ORDERED,
+				     _MALI_OSK_LOCK_ORDER_SESSIONS);
+	if (NULL == mali_sessions_lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_session_terminate(void)
+{
+	if (NULL != mali_sessions_lock) {
+		_mali_osk_spinlock_irq_term(mali_sessions_lock);
+		mali_sessions_lock = NULL;
+	}
+}
+
+void mali_session_add(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_add(&session->link, &mali_sessions);
+	mali_session_count++;
+	mali_session_unlock();
+}
+
+void mali_session_remove(struct mali_session_data *session)
+{
+	mali_session_lock();
+	_mali_osk_list_delinit(&session->link);
+	mali_session_count--;
+	mali_session_unlock();
+}
+
+u32 mali_session_get_count(void)
+{
+	return mali_session_count;
+}
+
+wait_queue_head_t *mali_session_get_wait_queue(void)
+{
+	return &pending_queue;
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in window render frame per sec calculate
+ */
+#if defined(CONFIG_MALI_DVFS)
+u32 mali_session_max_window_num(void)
+{
+	struct mali_session_data *session, *tmp;
+	u32 max_window_num = 0;
+	u32 tmp_number = 0;
+
+	mali_session_lock();
+
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		tmp_number = _mali_osk_atomic_xchg(
+				     &session->number_of_window_jobs, 0);
+		if (max_window_num < tmp_number) {
+			max_window_num = tmp_number;
+		}
+	}
+
+	mali_session_unlock();
+
+	return max_window_num;
+}
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx)
+{
+	struct mali_session_data *session, *tmp;
+	u32 mali_mem_usage;
+	u32 total_mali_mem_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+	u32 swap_pool_size;
+	u32 swap_unlock_size;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(print_ctx);
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+#ifdef MALI_MEM_SWAP_TRACKING
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u  %-10u  %-15u  %-15u  %-10u  %-10u  %-10u\n",
+				    session->comm, session->pid,
+				    (atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE,
+				    session->max_mali_mem_allocated_size,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE
+				   );
+#else
+		_mali_osk_ctxprintf(print_ctx, "  %-25s  %-10u  %-10u  %-15u  %-15u  %-10u  %-10u  \n",
+				    session->comm, session->pid,
+				    (atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE,
+				    session->max_mali_mem_allocated_size,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE,
+				    (atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE
+				   );
+#endif
+	}
+	mali_session_unlock();
+	mali_mem_usage  = _mali_ukk_report_memory_usage();
+	total_mali_mem_size = _mali_ukk_report_total_memory_size();
+	_mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size);
+#ifdef MALI_MEM_SWAP_TRACKING
+	mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size);
+	_mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size);
+#endif
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_session.h b/drivers/gpu/arm/utgard/common/mali_session.h
new file mode 100644
index 000000000000..6791b2b5f110
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_session.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SESSION_H__
+#define __MALI_SESSION_H__
+
+#include "mali_mmu_page_directory.h"
+#include "mali_osk.h"
+#include "mali_osk_list.h"
+#include "mali_memory_types.h"
+#include "mali_memory_manager.h"
+
+struct mali_timeline_system;
+struct mali_soft_system;
+
+/* Number of frame builder job lists per session. */
+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16
+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1)
+/*Max pending big job allowed in kernel*/
+#define MALI_MAX_PENDING_BIG_JOB (2)
+
+struct mali_session_data {
+	_mali_osk_notification_queue_t *ioctl_queue;
+
+	_mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */
+#if 0
+	_mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */
+#endif
+	struct mali_page_directory *page_directory; /**< MMU page directory for this session */
+
+	_MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */
+	_MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */
+
+#if defined(CONFIG_MALI_DVFS)
+	_mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */
+#endif
+
+	_mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id.  Used to link jobs from same frame builder. */
+
+	struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */
+	struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */
+
+	mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */
+	mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */
+	u32 pid;
+	char *comm;
+	atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */
+	atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/
+	size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */
+	/* Added for new memroy system */
+	struct mali_allocation_manager allocation_mgr;
+};
+
+_mali_osk_errcode_t mali_session_initialize(void);
+void mali_session_terminate(void);
+
+/* List of all sessions. Actual list head in mali_kernel_core.c */
+extern _mali_osk_list_t mali_sessions;
+/* Lock to protect modification and access to the mali_sessions list */
+extern _mali_osk_spinlock_irq_t *mali_sessions_lock;
+
+MALI_STATIC_INLINE void mali_session_lock(void)
+{
+	_mali_osk_spinlock_irq_lock(mali_sessions_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_unlock(void)
+{
+	_mali_osk_spinlock_irq_unlock(mali_sessions_lock);
+}
+
+void mali_session_add(struct mali_session_data *session);
+void mali_session_remove(struct mali_session_data *session);
+u32 mali_session_get_count(void);
+wait_queue_head_t *mali_session_get_wait_queue(void);
+
+#define MALI_SESSION_FOREACH(session, tmp, link) \
+	_MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link)
+
+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session)
+{
+	return session->page_directory;
+}
+
+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_wait(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_mutex_signal(session->memory_lock);
+}
+
+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object)
+{
+	_mali_osk_notification_queue_send(session->ioctl_queue, object);
+}
+
+#if defined(CONFIG_MALI_DVFS)
+
+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+	_mali_osk_atomic_inc(&session->number_of_window_jobs);
+}
+
+/*
+ * Get the max completed window jobs from all active session,
+ * which will be used in  window render frame per sec calculate
+ */
+u32 mali_session_max_window_num(void);
+
+#endif
+
+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx);
+
+#endif /* __MALI_SESSION_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_soft_job.c b/drivers/gpu/arm/utgard/common/mali_soft_job.c
new file mode 100644
index 000000000000..36ac982e1df0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_soft_job.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_soft_job.h"
+#include "mali_osk.h"
+#include "mali_timeline.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_scheduler.h"
+#include "mali_executor.h"
+
+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	_mali_osk_spinlock_irq_lock(system->lock);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system));
+	MALI_DEBUG_ASSERT(0 == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid());
+}
+
+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system));
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+	MALI_DEBUG_CODE(system->lock_owner = 0);
+	_mali_osk_spinlock_irq_unlock(system->lock);
+}
+
+#if defined(DEBUG)
+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner);
+}
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system)
+#else
+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system)
+#endif /* defined(DEBUG) */
+
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session)
+{
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->session = session;
+
+	system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER);
+	if (NULL == system->lock) {
+		mali_soft_job_system_destroy(system);
+		return NULL;
+	}
+	system->lock_owner = 0;
+	system->last_job_id = 0;
+
+	_MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used));
+
+	return system;
+}
+
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	/* All jobs should be free at this point. */
+	MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used)));
+
+	if (NULL != system) {
+		if (NULL != system->lock) {
+			_mali_osk_spinlock_irq_term(system->lock);
+		}
+		_mali_osk_free(system);
+	}
+}
+
+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+	MALI_DEBUG_ASSERT(system == job->system);
+
+	_mali_osk_list_del(&(job->system_list));
+
+	mali_soft_job_system_unlock(job->system);
+
+	_mali_osk_free(job);
+}
+
+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job, *tmp;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		if (job->id == job_id)
+			return job;
+	}
+
+	return NULL;
+}
+
+void mali_soft_job_destroy(struct mali_soft_job *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job));
+
+	if (NULL != job) {
+		if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return;
+
+		_mali_osk_atomic_term(&job->refcount);
+
+		if (NULL != job->activated_notification) {
+			_mali_osk_notification_delete(job->activated_notification);
+			job->activated_notification = NULL;
+		}
+
+		mali_soft_job_system_free_job(job->system, job);
+	}
+}
+
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job)
+{
+	struct mali_soft_job *job;
+	_mali_osk_notification_t *notification = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) ||
+			  (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type));
+
+	notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s));
+	if (unlikely(NULL == notification)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification"));
+		return NULL;
+	}
+
+	job = _mali_osk_malloc(sizeof(struct mali_soft_job));
+	if (unlikely(NULL == job)) {
+		MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n"));
+		return NULL;
+	}
+
+	mali_soft_job_system_lock(system);
+
+	job->system = system;
+	job->id = system->last_job_id++;
+	job->state = MALI_SOFT_JOB_STATE_ALLOCATED;
+
+	_mali_osk_list_add(&(job->system_list), &(system->jobs_used));
+
+	job->type = type;
+	job->user_job = user_job;
+	job->activated = MALI_FALSE;
+
+	job->activated_notification = notification;
+
+	_mali_osk_atomic_init(&job->refcount, 1);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	MALI_DEBUG_ASSERT(system == job->system);
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id);
+
+	mali_soft_job_system_unlock(system);
+
+	return job;
+}
+
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence)
+{
+	mali_timeline_point point;
+	struct mali_soft_job_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	system = job->system;
+
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system);
+
+	mali_soft_job_system_lock(system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state);
+	job->state = MALI_SOFT_JOB_STATE_STARTED;
+
+	mali_soft_job_system_unlock(system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job));
+
+	mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job);
+	point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT);
+
+	return point;
+}
+
+static mali_bool mali_soft_job_is_activated(void *data)
+{
+	struct mali_soft_job *job;
+
+	job = (struct mali_soft_job *) data;
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	return job->activated;
+}
+
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id)
+{
+	struct mali_soft_job *job;
+	struct mali_timeline_system *timeline_system;
+	mali_scheduler_mask schedule_mask;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_soft_job_system_lock(system);
+
+	job = mali_soft_job_system_lookup_job(system, job_id);
+
+	if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type)
+	    || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) {
+		mali_soft_job_system_unlock(system);
+		MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id));
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(system);
+
+		MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job));
+		mali_soft_job_destroy(job);
+
+		return _MALI_OSK_ERR_TIMEOUT;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+	mali_soft_job_system_unlock(system);
+
+	/* Since the job now is in signaled state, timeouts from the timeline system will be
+	 * ignored, and it is not possible to signal this job again. */
+
+	timeline_system = system->session->timeline_system;
+	MALI_DEBUG_ASSERT_POINTER(timeline_system);
+
+	/* Wait until activated. */
+	_mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job));
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+
+	mali_soft_job_destroy(job);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job)
+{
+	if (NULL != job->activated_notification) {
+		_mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer;
+		res->user_job = job->user_job;
+		mali_session_send_notification(job->system->session, job->activated_notification);
+	}
+	job->activated_notification = NULL;
+}
+
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job));
+
+		mali_soft_job_system_unlock(job->system);
+
+		/* Since we are in shutdown, we can ignore the scheduling bitmask. */
+		mali_timeline_tracker_release(&job->tracker);
+		mali_soft_job_destroy(job);
+		return schedule_mask;
+	}
+
+	/* Send activated notification. */
+	mali_soft_job_send_activated_notification(job);
+
+	/* Wake up sleeping signaler. */
+	job->activated = MALI_TRUE;
+
+	/* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */
+	if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+		job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+		mali_soft_job_system_unlock(job->system);
+
+		schedule_mask |= mali_timeline_tracker_release(&job->tracker);
+
+		mali_soft_job_destroy(job);
+	} else {
+		_mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue);
+
+		mali_soft_job_system_unlock(job->system);
+	}
+
+	return schedule_mask;
+}
+
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+	MALI_DEBUG_ASSERT_POINTER(job->system);
+	MALI_DEBUG_ASSERT_POINTER(job->system->session);
+	MALI_DEBUG_ASSERT(MALI_TRUE == job->activated);
+
+	MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job));
+
+	mali_soft_job_system_lock(job->system);
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED  == job->state ||
+			  MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+	if (unlikely(job->system->session->is_aborting)) {
+		/* The session is aborting.  This job will be released and destroyed by @ref
+		 * mali_soft_job_system_abort(). */
+		mali_soft_job_system_unlock(job->system);
+
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	if (MALI_SOFT_JOB_STATE_STARTED != job->state) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state);
+
+		/* The job is about to be signaled, ignore timeout. */
+		MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job));
+		mali_soft_job_system_unlock(job->system);
+		return schedule_mask;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state);
+
+	job->state = MALI_SOFT_JOB_STATE_TIMED_OUT;
+	_mali_osk_atomic_inc(&job->refcount);
+
+	mali_soft_job_system_unlock(job->system);
+
+	schedule_mask = mali_timeline_tracker_release(&job->tracker);
+
+	mali_soft_job_destroy(job);
+
+	return schedule_mask;
+}
+
+void mali_soft_job_system_abort(struct mali_soft_job_system *system)
+{
+	struct mali_soft_job *job, *tmp;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(jobs);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session));
+
+	mali_soft_job_system_lock(system);
+
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED   == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_STARTED == job->state) {
+			/* If the job has been activated, we have to release the tracker and destroy
+			 * the job.  If not, the tracker will be released and the job destroyed when
+			 * it is activated. */
+			if (MALI_TRUE == job->activated) {
+				MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job));
+
+				job->state = MALI_SOFT_JOB_STATE_SIGNALED;
+				_mali_osk_list_move(&job->system_list, &jobs);
+			}
+		} else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) {
+			MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job));
+
+			/* We need to destroy this soft job. */
+			_mali_osk_list_move(&job->system_list, &jobs);
+		}
+	}
+
+	mali_soft_job_system_unlock(system);
+
+	/* Release and destroy jobs. */
+	_MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) {
+		MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED  == job->state ||
+				  MALI_SOFT_JOB_STATE_TIMED_OUT == job->state);
+
+		if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) {
+			mali_timeline_tracker_release(&job->tracker);
+		}
+
+		/* Move job back to used list before destroying. */
+		_mali_osk_list_move(&job->system_list, &system->jobs_used);
+
+		mali_soft_job_destroy(job);
+	}
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_soft_job.h b/drivers/gpu/arm/utgard/common/mali_soft_job.h
new file mode 100644
index 000000000000..f35394e60384
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_soft_job.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SOFT_JOB_H__
+#define __MALI_SOFT_JOB_H__
+
+#include "mali_osk.h"
+
+#include "mali_timeline.h"
+
+struct mali_timeline_fence;
+struct mali_session_data;
+struct mali_soft_job;
+struct mali_soft_job_system;
+
+/**
+ * Soft job types.
+ *
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either
+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out
+ * by the Timeline system.
+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically
+ * in kernel when the job is activated.
+ */
+typedef enum mali_soft_job_type {
+	MALI_SOFT_JOB_TYPE_SELF_SIGNALED,
+	MALI_SOFT_JOB_TYPE_USER_SIGNALED,
+} mali_soft_job_type;
+
+/**
+ * Soft job state.
+ *
+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED.
+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED.
+ *
+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to
+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft
+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT.
+ *
+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the
+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT.  This can only happen to soft jobs in state
+ * MALI_SOFT_JOB_STATE_STARTED.
+ *
+ */
+typedef enum mali_soft_job_state {
+	MALI_SOFT_JOB_STATE_ALLOCATED,
+	MALI_SOFT_JOB_STATE_STARTED,
+	MALI_SOFT_JOB_STATE_SIGNALED,
+	MALI_SOFT_JOB_STATE_TIMED_OUT,
+} mali_soft_job_state;
+
+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1)
+
+/**
+ * Soft job struct.
+ *
+ * Soft job can be used to represent any kind of CPU work done in kernel-space.
+ */
+typedef struct mali_soft_job {
+	mali_soft_job_type            type;                   /**< Soft job type.  Must be one of MALI_SOFT_JOB_TYPE_*. */
+	u64                           user_job;               /**< Identifier for soft job in user space. */
+	_mali_osk_atomic_t            refcount;               /**< Soft jobs are reference counted to prevent premature deletion. */
+	struct mali_timeline_tracker  tracker;                /**< Timeline tracker for soft job. */
+	mali_bool                     activated;              /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */
+	_mali_osk_notification_t     *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */
+
+	/* Protected by soft job system lock. */
+	u32                           id;                     /**< Used by user-space to find corresponding soft job in kernel-space. */
+	mali_soft_job_state           state;                  /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */
+	struct mali_soft_job_system  *system;                 /**< The soft job system this job is in. */
+	_mali_osk_list_t              system_list;            /**< List element used by soft job system. */
+} mali_soft_job;
+
+/**
+ * Per-session soft job system.
+ *
+ * The soft job system is used to manage all soft jobs that belongs to a session.
+ */
+typedef struct mali_soft_job_system {
+	struct mali_session_data *session;                    /**< The session this soft job system belongs to. */
+	_MALI_OSK_LIST_HEAD(jobs_used);                       /**< List of all allocated soft jobs. */
+
+	_mali_osk_spinlock_irq_t *lock;                       /**< Lock used to protect soft job system and its soft jobs. */
+	u32 lock_owner;                                       /**< Contains tid of thread that locked the system or 0, if not locked. */
+	u32 last_job_id;                                      /**< Recored the last job id protected by lock. */
+} mali_soft_job_system;
+
+/**
+ * Create a soft job system.
+ *
+ * @param session The session this soft job system will belong to.
+ * @return The new soft job system, or NULL if unsuccessful.
+ */
+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session);
+
+/**
+ * Destroy a soft job system.
+ *
+ * @note The soft job must not have any started or activated jobs.  Call @ref
+ * mali_soft_job_system_abort first.
+ *
+ * @param system The soft job system we are destroying.
+ */
+void mali_soft_job_system_destroy(struct mali_soft_job_system *system);
+
+/**
+ * Create a soft job.
+ *
+ * @param system Soft job system to create soft job from.
+ * @param type Type of the soft job.
+ * @param user_job Identifier for soft job in user space.
+ * @return New soft job if successful, NULL if not.
+ */
+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job);
+
+/**
+ * Destroy soft job.
+ *
+ * @param job Soft job to destroy.
+ */
+void mali_soft_job_destroy(struct mali_soft_job *job);
+
+/**
+ * Start a soft job.
+ *
+ * The soft job will be added to the Timeline system which will then activate it after all
+ * dependencies have been resolved.
+ *
+ * Create soft jobs with @ref mali_soft_job_create before starting them.
+ *
+ * @param job Soft job to start.
+ * @param fence Fence representing dependencies for this soft job.
+ * @return Point on soft job timeline.
+ */
+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence);
+
+/**
+ * Use by user-space to signal that a soft job has completed.
+ *
+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED.
+ *
+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful.
+ *
+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be
+ * destroyed after this call and should no longer be used.
+ *
+ * @note This function will block until the soft job has been activated.
+ *
+ * @param system The soft job system the job was started in.
+ * @param job_id ID of soft job we are signaling.
+ *
+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the
+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job.
+ */
+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id);
+
+/**
+ * Used by the Timeline system to activate a soft job.
+ *
+ * @param job The soft job that is being activated.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job);
+
+/**
+ * Used by the Timeline system to timeout a soft job.
+ *
+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after
+ * activation.
+ *
+ * @param job The soft job that is being timed out.
+ * @return A scheduling bitmask.
+ */
+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job);
+
+/**
+ * Used to cleanup activated soft jobs in the soft job system on session abort.
+ *
+ * @param system The soft job system that is being aborted.
+ */
+void mali_soft_job_system_abort(struct mali_soft_job_system *system);
+
+#endif /* __MALI_SOFT_JOB_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.c b/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.c
new file mode 100644
index 000000000000..178abaf43ba1
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_spinlock_reentrant.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order)
+{
+	struct mali_spinlock_reentrant *spinlock;
+
+	spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant));
+	if (NULL == spinlock) {
+		return NULL;
+	}
+
+	spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order);
+	if (NULL == spinlock->lock) {
+		mali_spinlock_reentrant_term(spinlock);
+		return NULL;
+	}
+
+	return spinlock;
+}
+
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner);
+
+	if (NULL != spinlock->lock) {
+		_mali_osk_spinlock_irq_term(spinlock->lock);
+	}
+
+	_mali_osk_free(spinlock);
+}
+
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid);
+
+	MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__));
+
+	if (tid != spinlock->owner) {
+		_mali_osk_spinlock_irq_lock(spinlock->lock);
+		MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter);
+		spinlock->owner = tid;
+	}
+
+	MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__));
+
+	++spinlock->counter;
+}
+
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock);
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner);
+
+	--spinlock->counter;
+	if (0 == spinlock->counter) {
+		spinlock->owner = 0;
+		MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__));
+		_mali_osk_spinlock_irq_unlock(spinlock->lock);
+	}
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.h b/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.h
new file mode 100644
index 000000000000..6a62df850b2f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_spinlock_reentrant.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_SPINLOCK_REENTRANT_H__
+#define __MALI_SPINLOCK_REENTRANT_H__
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+/**
+ * Reentrant spinlock.
+ */
+struct mali_spinlock_reentrant {
+	_mali_osk_spinlock_irq_t *lock;
+	u32               owner;
+	u32               counter;
+};
+
+/**
+ * Create a new reentrant spinlock.
+ *
+ * @param lock_order Lock order.
+ * @return New reentrant spinlock.
+ */
+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order);
+
+/**
+ * Terminate reentrant spinlock and free any associated resources.
+ *
+ * @param spinlock Reentrant spinlock to terminate.
+ */
+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock);
+
+/**
+ * Wait for reentrant spinlock to be signaled.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Signal reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ */
+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid);
+
+/**
+ * Check if thread is holding reentrant spinlock.
+ *
+ * @param spinlock Reentrant spinlock.
+ * @param tid Thread ID.
+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid)
+{
+	MALI_DEBUG_ASSERT_POINTER(spinlock->lock);
+	return (tid == spinlock->owner && 0 < spinlock->counter);
+}
+
+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline.c b/drivers/gpu/arm/utgard/common/mali_timeline.c
new file mode 100644
index 000000000000..5a767b39d56e
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline.c
@@ -0,0 +1,1586 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline.h"
+#include "mali_kernel_common.h"
+#include "mali_scheduler.h"
+#include "mali_soft_job.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+#include "mali_executor.h"
+#include "mali_pp_job.h"
+
+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid()))
+
+/*
+ * Following three elements are used to record how many
+ * gp, physical pp or virtual pp jobs are delayed in the whole
+ * timeline system, we can use these three value to decide
+ * if need to deactivate idle group.
+ */
+_mali_osk_atomic_t gp_tracker_count;
+_mali_osk_atomic_t phy_pp_tracker_count;
+_mali_osk_atomic_t virt_pp_tracker_count;
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter);
+
+#if defined(CONFIG_SYNC)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+struct mali_deferred_fence_put_entry {
+	struct hlist_node list;
+	struct sync_fence *fence;
+};
+
+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list);
+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock);
+
+static void put_sync_fences(struct work_struct *ignore)
+{
+	struct hlist_head list;
+	struct hlist_node *tmp, *pos;
+	unsigned long flags;
+	struct mali_deferred_fence_put_entry *o;
+
+	spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+	hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list);
+	spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+	hlist_for_each_entry_safe(o, pos, tmp, &list, list) {
+		sync_fence_put(o->fence);
+		kfree(o);
+	}
+}
+
+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */
+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter)
+{
+	struct mali_timeline_system  *system;
+	struct mali_timeline_waiter  *waiter;
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool is_aborting = MALI_FALSE;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	int fence_status = sync_fence->status;
+#else
+	int fence_status = atomic_read(&sync_fence->status);
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter);
+
+	tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	is_aborting = system->session->is_aborting;
+	if (!is_aborting && (0 > fence_status)) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status));
+		tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+	}
+
+	waiter = tracker->waiter_sync;
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	tracker->sync_fence = NULL;
+	tracker->fence.sync_fd = -1;
+
+	schedule_mask |= mali_timeline_system_release_waiter(system, waiter);
+
+	/* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */
+	if (is_aborting) {
+		_mali_osk_wait_queue_wake_up(system->wait_queue);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/*
+	 * Older versions of Linux, before 3.5, doesn't support fput() in interrupt
+	 * context. For those older kernels, allocate a list object and put the
+	 * fence object on that and defer the call to sync_fence_put() to a workqueue.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+	{
+		struct mali_deferred_fence_put_entry *obj;
+
+		obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC);
+		if (obj) {
+			unsigned long flags;
+			mali_bool schedule = MALI_FALSE;
+
+			obj->fence = sync_fence;
+
+			spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags);
+			if (hlist_empty(&mali_timeline_sync_fence_to_free_list))
+				schedule = MALI_TRUE;
+			hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list);
+			spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags);
+
+			if (schedule)
+				schedule_delayed_work(&delayed_sync_fence_put, 0);
+		}
+	}
+#else
+	sync_fence_put(sync_fence);
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */
+
+	if (!is_aborting) {
+		mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE);
+	}
+}
+#endif /* defined(CONFIG_SYNC) */
+
+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type);
+
+	return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job);
+}
+
+static void mali_timeline_timer_callback(void *data)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker;
+	struct mali_timeline *timeline;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	timeline = (struct mali_timeline *) data;
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	if (!system->timer_enabled) {
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		return;
+	}
+
+	tracker = timeline->tracker_tail;
+	timeline->timer_active = MALI_FALSE;
+
+	if (NULL != tracker && MALI_TRUE == tracker->timer_active) {
+		/* This is likely the delayed work that has been schedule out before cancelled. */
+		if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) {
+			mali_spinlock_reentrant_signal(system->spinlock, tid);
+			return;
+		}
+
+		schedule_mask = mali_timeline_tracker_time_out(tracker);
+		tracker->timer_active = MALI_FALSE;
+	} else {
+		MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n"));
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+	system->timer_enabled = MALI_FALSE;
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			timeline->timer_active = MALI_FALSE;
+		}
+	}
+}
+
+static void mali_timeline_destroy(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	if (NULL != timeline) {
+		/* Assert that the timeline object has been properly cleaned up before destroying it. */
+		MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+		MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		MALI_DEBUG_ASSERT(NULL != timeline->system);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id);
+
+#if defined(CONFIG_SYNC)
+		if (NULL != timeline->sync_tl) {
+			sync_timeline_destroy(timeline->sync_tl);
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		if (NULL != timeline->delayed_work) {
+			_mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work);
+			_mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work);
+		}
+
+#ifndef CONFIG_SYNC
+		_mali_osk_free(timeline);
+#endif
+	}
+}
+
+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX);
+
+	timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline));
+	if (NULL == timeline) {
+		return NULL;
+	}
+
+	/* Initially the timeline is empty. */
+#if defined(MALI_TIMELINE_DEBUG_START_POINT)
+	/* Start the timeline a bit before wrapping when debugging. */
+	timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128;
+#else
+	timeline->point_next = 1;
+#endif
+	timeline->point_oldest = timeline->point_next;
+
+	/* The tracker and waiter lists will initially be empty. */
+
+	timeline->system = system;
+	timeline->id = id;
+
+	timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline);
+	if (NULL == timeline->delayed_work) {
+		mali_timeline_destroy(timeline);
+		return NULL;
+	}
+
+	timeline->timer_active = MALI_FALSE;
+
+#if defined(CONFIG_SYNC)
+	{
+		char timeline_name[32];
+
+		switch (id) {
+		case MALI_TIMELINE_GP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_PP:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid());
+			break;
+		case MALI_TIMELINE_SOFT:
+			_mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid());
+			break;
+		default:
+			MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id));
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->destroyed = MALI_FALSE;
+
+		timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name);
+		if (NULL == timeline->sync_tl) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+
+		timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+		if (NULL == timeline->spinlock) {
+			mali_timeline_destroy(timeline);
+			return NULL;
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return timeline;
+}
+
+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	if (mali_timeline_is_full(timeline)) {
+		/* Don't add tracker if timeline is full. */
+		tracker->point = MALI_TIMELINE_NO_POINT;
+		return;
+	}
+
+	tracker->timeline = timeline;
+	tracker->point    = timeline->point_next;
+
+	/* Find next available point. */
+	timeline->point_next++;
+	if (MALI_TIMELINE_NO_POINT == timeline->point_next) {
+		timeline->point_next++;
+	}
+
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+
+	if (MALI_TIMELINE_TRACKER_GP == tracker->type) {
+		_mali_osk_atomic_inc(&gp_tracker_count);
+	} else if (MALI_TIMELINE_TRACKER_PP == tracker->type) {
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_inc(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_inc(&phy_pp_tracker_count);
+		}
+	}
+
+	/* Add tracker as new head on timeline's tracker list. */
+	if (NULL == timeline->tracker_head) {
+		/* Tracker list is empty. */
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+
+		timeline->tracker_tail = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev);
+	} else {
+		MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+
+		tracker->timeline_prev = timeline->tracker_head;
+		timeline->tracker_head->timeline_next = tracker;
+
+		MALI_DEBUG_ASSERT(NULL == tracker->timeline_next);
+	}
+	timeline->tracker_head = tracker;
+
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next);
+	MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev);
+}
+
+/* Inserting the waiter object into the given timeline */
+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new)
+{
+	struct mali_timeline_waiter *waiter_prev;
+	struct mali_timeline_waiter *waiter_next;
+
+	/* Waiter time must be between timeline head and tail, and there must
+	 * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */
+	MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN);
+	MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN);
+
+	/* Finding out where to put this waiter, in the linked waiter list of the given timeline **/
+	waiter_prev = timeline->waiter_head; /* Insert new after  waiter_prev */
+	waiter_next = NULL;                  /* Insert new before waiter_next */
+
+	/* Iterating backwards from head (newest) to tail (oldest) until we
+	 * find the correct spot to insert the new waiter */
+	while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) {
+		waiter_next = waiter_prev;
+		waiter_prev = waiter_prev->timeline_prev;
+	}
+
+	if (NULL == waiter_prev && NULL == waiter_next) {
+		/* list is empty */
+		timeline->waiter_head = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else if (NULL == waiter_next) {
+		/* insert at head */
+		waiter_new->timeline_prev = timeline->waiter_head;
+		timeline->waiter_head->timeline_next = waiter_new;
+		timeline->waiter_head = waiter_new;
+	} else if (NULL == waiter_prev) {
+		/* insert at tail */
+		waiter_new->timeline_next = timeline->waiter_tail;
+		timeline->waiter_tail->timeline_prev = waiter_new;
+		timeline->waiter_tail = waiter_new;
+	} else {
+		/* insert between */
+		waiter_new->timeline_next = waiter_next;
+		waiter_new->timeline_prev = waiter_prev;
+		waiter_next->timeline_prev = waiter_new;
+		waiter_prev->timeline_next = waiter_new;
+	}
+}
+
+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline)
+{
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *oldest_tracker;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id);
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Timer is disabled, early out. */
+	if (!system->timer_enabled) return;
+
+	oldest_tracker = timeline->tracker_tail;
+	if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) {
+		if (MALI_FALSE == oldest_tracker->timer_active) {
+			if (MALI_TRUE == timeline->timer_active) {
+				_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+			}
+			_mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ);
+			oldest_tracker->timer_active = MALI_TRUE;
+			timeline->timer_active = MALI_TRUE;
+		}
+	} else if (MALI_TRUE == timeline->timer_active) {
+		_mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work);
+		timeline->timer_active = MALI_FALSE;
+	}
+}
+
+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	MALI_DEBUG_CODE({
+		struct mali_timeline_system *system = timeline->system;
+		MALI_DEBUG_ASSERT_POINTER(system);
+
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	});
+
+	if (NULL != timeline->tracker_tail) {
+		/* Set oldest point to oldest tracker's point */
+		timeline->point_oldest = timeline->tracker_tail->point;
+	} else {
+		/* No trackers, mark point list as empty */
+		timeline->point_oldest = timeline->point_next;
+	}
+
+	/* Release all waiters no longer on the timeline's point list.
+	 * Releasing a waiter can trigger this function to be called again, so
+	 * we do not store any pointers on stack. */
+	while (NULL != timeline->waiter_tail) {
+		u32 waiter_time_relative;
+		u32 time_head_relative;
+		struct mali_timeline_waiter *waiter = timeline->waiter_tail;
+
+		time_head_relative = timeline->point_next - timeline->point_oldest;
+		waiter_time_relative = waiter->point - timeline->point_oldest;
+
+		if (waiter_time_relative < time_head_relative) {
+			/* This and all following waiters are on the point list, so we are done. */
+			break;
+		}
+
+		/* Remove waiter from timeline's waiter list. */
+		if (NULL != waiter->timeline_next) {
+			waiter->timeline_next->timeline_prev = NULL;
+		} else {
+			/* This was the last waiter */
+			timeline->waiter_head = NULL;
+		}
+		timeline->waiter_tail = waiter->timeline_next;
+
+		/* Release waiter.  This could activate a tracker, if this was
+		 * the last waiter for the tracker. */
+		schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter);
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type);
+
+	/* Zero out all tracker members. */
+	_mali_osk_memset(tracker, 0, sizeof(*tracker));
+
+	tracker->type = type;
+	tracker->job = job;
+	tracker->trigger_ref_count = 1;  /* Prevents any callback from trigging while adding it */
+	tracker->os_tick_create = _mali_osk_time_tickcount();
+	MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC);
+
+	tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE;
+
+	/* Copy fence. */
+	if (NULL != fence) {
+		_mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence));
+	}
+}
+
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+	struct mali_timeline_tracker *tracker_next, *tracker_prev;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	u32 tid = _mali_osk_get_tid();
+
+	/* Upon entry a group lock will be held, but not a scheduler lock. */
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	/* Tracker should have been triggered */
+	MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count);
+
+	/* All waiters should have been released at this point */
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job));
+
+	timeline = tracker->timeline;
+	if (NULL == timeline) {
+		/* Tracker was not on a timeline, there is nothing to release. */
+		return MALI_SCHEDULER_MASK_EMPTY;
+	}
+
+	system = timeline->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Tracker should still be on timeline */
+	MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point));
+
+	/* Tracker is no longer valid. */
+	MALI_DEBUG_CODE(tracker->magic = 0);
+
+	tracker_next = tracker->timeline_next;
+	tracker_prev = tracker->timeline_prev;
+	tracker->timeline_next = NULL;
+	tracker->timeline_prev = NULL;
+
+	/* Removing tracker from timeline's tracker list */
+	if (NULL == tracker_next) {
+		/* This tracker was the head */
+		timeline->tracker_head = tracker_prev;
+	} else {
+		tracker_next->timeline_prev = tracker_prev;
+	}
+
+	if (NULL == tracker_prev) {
+		/* This tracker was the tail */
+		timeline->tracker_tail = tracker_next;
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+		/* Update the timeline's oldest time and release any waiters */
+		schedule_mask |= mali_timeline_update_oldest_point(timeline);
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+	} else {
+		tracker_prev->timeline_next = tracker_next;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Update delayed work only when it is the soft job timeline */
+	if (MALI_TIMELINE_SOFT == tracker->timeline->id) {
+		mali_timeline_update_delayed_work(tracker->timeline);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *tail,
+		struct mali_timeline_waiter *head)
+{
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(head);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	head->tracker_next = system->waiter_empty_list;
+	system->waiter_empty_list = tail;
+}
+
+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+	struct mali_timeline_system *system;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	system = tracker->system;
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker->os_tick_activate = _mali_osk_time_tickcount();
+
+	if (NULL != tracker->waiter_head) {
+		mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head);
+		tracker->waiter_head = NULL;
+		tracker->waiter_tail = NULL;
+	}
+
+	switch (tracker->type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job);
+
+		_mali_osk_atomic_dec(&gp_tracker_count);
+		break;
+	case MALI_TIMELINE_TRACKER_PP:
+		if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) {
+			_mali_osk_atomic_dec(&virt_pp_tracker_count);
+		} else {
+			_mali_osk_atomic_dec(&phy_pp_tracker_count);
+		}
+		schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SOFT:
+		timeline = tracker->timeline;
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job);
+
+		/* Start a soft timer to make sure the soft job be released in a limited time */
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		mali_timeline_update_delayed_work(timeline);
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+		break;
+	case MALI_TIMELINE_TRACKER_WAIT:
+		mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job);
+		break;
+	case MALI_TIMELINE_TRACKER_SYNC:
+#if defined(CONFIG_SYNC)
+		mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job);
+#else
+		MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type));
+#endif /* defined(CONFIG_SYNC) */
+		break;
+	default:
+		MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type));
+		break;
+	}
+
+	return schedule_mask;
+}
+
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker)
+{
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count++;
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error)
+{
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->trigger_ref_count--;
+
+	tracker->activation_error |= activation_error;
+
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return schedule_mask;
+}
+
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence)
+{
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+	MALI_DEBUG_ASSERT_POINTER(uk_fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		fence->points[i] = uk_fence->points[i];
+	}
+
+	fence->sync_fd = uk_fence->sync_fd;
+}
+
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session)
+{
+	u32 i;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n"));
+
+	system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system));
+	if (NULL == system) {
+		return NULL;
+	}
+
+	system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM);
+	if (NULL == system->spinlock) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i);
+		if (NULL == system->timelines[i]) {
+			mali_timeline_system_destroy(system);
+			return NULL;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled");
+	if (NULL == system->signaled_sync_tl) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	system->waiter_empty_list = NULL;
+	system->session = session;
+	system->timer_enabled = MALI_TRUE;
+
+	system->wait_queue = _mali_osk_wait_queue_init();
+	if (NULL == system->wait_queue) {
+		mali_timeline_system_destroy(system);
+		return NULL;
+	}
+
+	return system;
+}
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Check if there are any trackers left on timeline.
+ *
+ * Used as a wait queue conditional.
+ *
+ * @param data Timeline.
+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_has_no_trackers(void *data)
+{
+	struct mali_timeline *timeline = (struct mali_timeline *) data;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	return mali_timeline_is_empty(timeline);
+}
+
+/**
+ * Cancel sync fence waiters waited upon by trackers on all timelines.
+ *
+ * Will return after all timelines have no trackers left.
+ *
+ * @param system Timeline system.
+ */
+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system)
+{
+	u32 i;
+	u32 tid = _mali_osk_get_tid();
+	struct mali_timeline_tracker *tracker, *tracker_next;
+	_MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list);
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Cancel sync fence waiters. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		tracker_next = timeline->tracker_tail;
+		while (NULL != tracker_next) {
+			tracker = tracker_next;
+			tracker_next = tracker->timeline_next;
+
+			if (NULL == tracker->sync_fence) continue;
+
+			MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker));
+
+			/* Cancel sync fence waiter. */
+			if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) {
+				/* Callback was not called, move tracker to local list. */
+				_mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list);
+			}
+		}
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	/* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */
+	_MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) {
+		mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter);
+	}
+
+	/* Sleep until all sync fence callbacks are done and all timelines are empty. */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline);
+	}
+}
+
+#endif /* defined(CONFIG_SYNC) */
+
+void mali_timeline_system_abort(struct mali_timeline_system *system)
+{
+	MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid(););
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT(system->session->is_aborting);
+
+	MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session));
+
+#if defined(CONFIG_SYNC)
+	mali_timeline_cancel_sync_fence_waiters(system);
+#endif /* defined(CONFIG_SYNC) */
+
+	/* Should not be any waiters or trackers left at this point. */
+	MALI_DEBUG_CODE({
+		u32 i;
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i)
+		{
+			struct mali_timeline *timeline = system->timelines[i];
+			MALI_DEBUG_ASSERT_POINTER(timeline);
+			MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_head);
+			MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail);
+		}
+		mali_spinlock_reentrant_signal(system->spinlock, tid);
+	});
+}
+
+void mali_timeline_system_destroy(struct mali_timeline_system *system)
+{
+	u32 i;
+	struct mali_timeline_waiter *waiter, *next;
+#if defined(CONFIG_SYNC)
+	u32 tid = _mali_osk_get_tid();
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n"));
+
+	if (NULL != system) {
+
+		/* There should be no waiters left on this queue. */
+		if (NULL != system->wait_queue) {
+			_mali_osk_wait_queue_term(system->wait_queue);
+			system->wait_queue = NULL;
+		}
+
+		/* Free all waiters in empty list */
+		waiter = system->waiter_empty_list;
+		while (NULL != waiter) {
+			next = waiter->tracker_next;
+			_mali_osk_free(waiter);
+			waiter = next;
+		}
+
+#if defined(CONFIG_SYNC)
+		if (NULL != system->signaled_sync_tl) {
+			sync_timeline_destroy(system->signaled_sync_tl);
+		}
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) {
+				mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid);
+				system->timelines[i]->destroyed = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid);
+			}
+		}
+#endif /* defined(CONFIG_SYNC) */
+
+		for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+			if (NULL != system->timelines[i]) {
+				mali_timeline_destroy(system->timelines[i]);
+			}
+		}
+
+		if (NULL != system->spinlock) {
+			mali_spinlock_reentrant_term(system->spinlock);
+		}
+
+		_mali_osk_free(system);
+	}
+}
+
+/**
+ * Find how many waiters are needed for a given fence.
+ *
+ * @param fence The fence to check.
+ * @return Number of waiters needed for fence.
+ */
+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence)
+{
+	u32 i, num_waiters = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		if (MALI_TIMELINE_NO_POINT != fence->points[i]) {
+			++num_waiters;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) ++num_waiters;
+#endif /* defined(CONFIG_SYNC) */
+
+	return num_waiters;
+}
+
+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system)
+{
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	waiter = system->waiter_empty_list;
+	if (NULL != waiter) {
+		/* Remove waiter from empty list and zero it */
+		system->waiter_empty_list = waiter->tracker_next;
+		_mali_osk_memset(waiter, 0, sizeof(*waiter));
+	}
+
+	/* Return NULL if list was empty. */
+	return waiter;
+}
+
+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system,
+		struct mali_timeline_waiter **tail,
+		struct mali_timeline_waiter **head,
+		int max_num_waiters)
+{
+	u32 i, tid = _mali_osk_get_tid();
+	mali_bool do_alloc;
+	struct mali_timeline_waiter *waiter;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tail);
+	MALI_DEBUG_ASSERT_POINTER(head);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	*head = *tail = NULL;
+	do_alloc = MALI_FALSE;
+	i = 0;
+	while (i < max_num_waiters) {
+		if (MALI_FALSE == do_alloc) {
+			waiter = mali_timeline_system_get_zeroed_waiter(system);
+			if (NULL == waiter) {
+				do_alloc = MALI_TRUE;
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+				continue;
+			}
+		} else {
+			waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter));
+			if (NULL == waiter) break;
+		}
+		++i;
+		if (NULL == *tail) {
+			*tail = waiter;
+			*head = waiter;
+		} else {
+			(*head)->tracker_next = waiter;
+			*head = waiter;
+		}
+	}
+	if (MALI_TRUE == do_alloc) {
+		mali_spinlock_reentrant_wait(system->spinlock, tid);
+	}
+}
+
+/**
+ * Create waiters for the given tracker. The tracker is activated when all waiters are release.
+ *
+ * @note Tracker can potentially be activated before this function returns.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker we will create waiters for.
+ * @param waiter_tail List of pre-allocated waiters.
+ * @param waiter_head List of pre-allocated waiters.
+ */
+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		struct mali_timeline_waiter *waiter_tail,
+		struct mali_timeline_waiter *waiter_head)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif /* defined(CONFIG_SYNC) */
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_head);
+	MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+	MALI_DEBUG_ASSERT(NULL != tracker->job);
+
+	/* Creating waiter object for all the timelines the fence is put on. Inserting this waiter
+	 * into the timelines sorted list of waiters */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		mali_timeline_point point;
+		struct mali_timeline *timeline;
+		struct mali_timeline_waiter *waiter;
+
+		/* Get point on current timeline from tracker's fence. */
+		point = tracker->fence.points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline so we don't need a waiter. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n",
+					  point, timeline->point_oldest, timeline->point_next));
+			continue;
+		}
+
+		if (likely(mali_timeline_is_point_released(timeline, point))) {
+			/* Tracker representing the point has been released so we don't need a
+			 * waiter. */
+			continue;
+		}
+
+		/* The point is on timeline. */
+		MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point));
+
+		/* Get a new zeroed waiter object. */
+		if (likely(NULL != waiter_tail)) {
+			waiter = waiter_tail;
+			waiter_tail = waiter_tail->tracker_next;
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			continue;
+		}
+
+		/* Yanking the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = point;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Add waiter to timeline. */
+		mali_timeline_insert_waiter(timeline, waiter);
+	}
+#if defined(CONFIG_SYNC)
+	if (-1 != tracker->fence.sync_fd) {
+		int ret;
+		struct mali_timeline_waiter *waiter;
+
+		sync_fence = sync_fence_fdget(tracker->fence.sync_fd);
+		if (unlikely(NULL == sync_fence)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd));
+			goto exit;
+		}
+
+		/* Check if we have a zeroed waiter object available. */
+		if (unlikely(NULL == waiter_tail)) {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n"));
+			goto exit;
+		}
+
+		/* Start asynchronous wait that will release waiter when the fence is signaled. */
+		sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback);
+		ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter);
+		if (1 == ret) {
+			/* Fence already signaled, no waiter needed. */
+			tracker->fence.sync_fd = -1;
+			goto exit;
+		} else if (0 != ret) {
+			MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret));
+			tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT;
+			goto exit;
+		}
+
+		/* Grab new zeroed waiter object. */
+		waiter = waiter_tail;
+		waiter_tail = waiter_tail->tracker_next;
+
+		/* Increase the trigger ref count of the tracker. */
+		tracker->trigger_ref_count++;
+
+		waiter->point   = MALI_TIMELINE_NO_POINT;
+		waiter->tracker = tracker;
+
+		/* Insert waiter on tracker's singly-linked waiter list. */
+		if (NULL == tracker->waiter_head) {
+			/* list is empty */
+			MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail);
+			tracker->waiter_tail = waiter;
+		} else {
+			tracker->waiter_head->tracker_next = waiter;
+		}
+		tracker->waiter_head = waiter;
+
+		/* Also store waiter in separate field for easy access by sync callback. */
+		tracker->waiter_sync = waiter;
+
+		/* Store the sync fence in tracker so we can retrieve in abort session, if needed. */
+		tracker->sync_fence = sync_fence;
+
+		sync_fence = NULL;
+	}
+exit:
+#endif /* defined(CONFIG_SYNC) */
+
+	if (NULL != waiter_tail) {
+		mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head);
+	}
+
+	/* Release the initial trigger ref count. */
+	tracker->trigger_ref_count--;
+
+	/* If there were no waiters added to this tracker we activate immediately. */
+	if (0 == tracker->trigger_ref_count) {
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE);
+}
+
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id)
+{
+	int num_waiters = 0;
+	struct mali_timeline_waiter *waiter_tail, *waiter_head;
+	u32 tid = _mali_osk_get_tid();
+	mali_timeline_point point = MALI_TIMELINE_NO_POINT;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(system->session);
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id));
+
+	MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count);
+	tracker->system = system;
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	num_waiters = mali_timeline_fence_num_waiters(&tracker->fence);
+
+	/* Allocate waiters. */
+	mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	/* Add tracker to timeline.  This will allocate a point for the tracker on the timeline. If
+	 * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the
+	 * point will be MALI_TIMELINE_NO_POINT.
+	 *
+	 * NOTE: the tracker can fail to be added if the timeline is full.  If this happens, the
+	 * point will be MALI_TIMELINE_NO_POINT. */
+	MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE);
+	if (likely(timeline_id < MALI_TIMELINE_MAX)) {
+		struct mali_timeline *timeline = system->timelines[timeline_id];
+		mali_timeline_insert_tracker(timeline, tracker);
+		MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline));
+	}
+
+	point = tracker->point;
+
+	/* Create waiters for tracker based on supplied fence.  Each waiter will increase the
+	 * trigger ref count. */
+	mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head);
+	tracker = NULL;
+
+	/* At this point the tracker object might have been freed so we should no longer
+	 * access it. */
+
+
+	/* The tracker will always be activated after calling add_tracker, even if NO_POINT is
+	 * returned. */
+	return point;
+}
+
+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system,
+		struct mali_timeline_waiter *waiter)
+{
+	struct mali_timeline_tracker *tracker;
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(waiter);
+
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system));
+
+	tracker = waiter->tracker;
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	/* At this point the waiter has been removed from the timeline's waiter list, but it is
+	 * still on the tracker's waiter list.  All of the tracker's waiters will be released when
+	 * the tracker is activated. */
+
+	waiter->point   = MALI_TIMELINE_NO_POINT;
+	waiter->tracker = NULL;
+
+	tracker->trigger_ref_count--;
+	if (0 == tracker->trigger_ref_count) {
+		/* This was the last waiter; activate tracker */
+		schedule_mask |= mali_timeline_tracker_activate(tracker);
+		tracker = NULL;
+	}
+
+	return schedule_mask;
+}
+
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id)
+{
+	mali_timeline_point point;
+	struct mali_timeline *timeline;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	if (MALI_TIMELINE_MAX <= timeline_id) {
+		return MALI_TIMELINE_NO_POINT;
+	}
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	timeline = system->timelines[timeline_id];
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	point = MALI_TIMELINE_NO_POINT;
+	if (timeline->point_oldest != timeline->point_next) {
+		point = timeline->point_next - 1;
+		if (MALI_TIMELINE_NO_POINT == point) point--;
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+	return point;
+}
+
+void mali_timeline_initialize(void)
+{
+	_mali_osk_atomic_init(&gp_tracker_count, 0);
+	_mali_osk_atomic_init(&phy_pp_tracker_count, 0);
+	_mali_osk_atomic_init(&virt_pp_tracker_count, 0);
+}
+
+void mali_timeline_terminate(void)
+{
+	_mali_osk_atomic_term(&gp_tracker_count);
+	_mali_osk_atomic_term(&phy_pp_tracker_count);
+	_mali_osk_atomic_term(&virt_pp_tracker_count);
+}
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id)
+{
+	struct mali_timeline *timeline;
+	struct mali_timeline_system *system;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	MALI_DEBUG_ASSERT_POINTER(tracker->timeline);
+	timeline = tracker->timeline;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline->system);
+	system = timeline->system;
+
+	if (MALI_TIMELINE_MAX > id) {
+		if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) {
+			return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]);
+		} else {
+			return MALI_FALSE;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id);
+		return MALI_FALSE;
+	}
+}
+
+static const char *timeline_id_to_string(enum mali_timeline_id id)
+{
+	switch (id) {
+	case MALI_TIMELINE_GP:
+		return "GP";
+	case MALI_TIMELINE_PP:
+		return "PP";
+	case MALI_TIMELINE_SOFT:
+		return "SOFT";
+	default:
+		return "NONE";
+	}
+}
+
+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type)
+{
+	switch (type) {
+	case MALI_TIMELINE_TRACKER_GP:
+		return "GP";
+	case MALI_TIMELINE_TRACKER_PP:
+		return "PP";
+	case MALI_TIMELINE_TRACKER_SOFT:
+		return "SOFT";
+	case MALI_TIMELINE_TRACKER_WAIT:
+		return "WAIT";
+	case MALI_TIMELINE_TRACKER_SYNC:
+		return "SYNC";
+	default:
+		return "INVALID";
+	}
+}
+
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker)
+{
+	struct mali_timeline *timeline = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	timeline = tracker->timeline;
+
+	if (0 != tracker->trigger_ref_count) {
+		return MALI_TIMELINE_TS_WAITING;
+	}
+
+	if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) {
+		return MALI_TIMELINE_TS_ACTIVE;
+	}
+
+	if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) {
+		return MALI_TIMELINE_TS_INIT;
+	}
+
+	return MALI_TIMELINE_TS_FINISH;
+}
+
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->fence.sync_fd, tracker->sync_fence, tracker->job);
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->fence.sync_fd, tracker->sync_fence, tracker->job);
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+				    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+				    tracker->job);
+	} else {
+		_mali_osk_ctxprintf(print_ctx, "TL:  %s %u %c  job:(0x%08X)\n",
+				    tracker_type, tracker->point, state_char,
+				    tracker->job);
+	}
+#endif
+}
+
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_print_tracker(tracker, print_ctx);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker)
+{
+	const char *tracker_state = "IWAF";
+	char state_char = 'I';
+	char tracker_type[32] = {0};
+
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+
+	state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker));
+	_mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type));
+
+#if defined(CONFIG_SYNC)
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  fd:%d  fence:(0x%08X)  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->fence.sync_fd, tracker->sync_fence, tracker->job));
+	}
+#else
+	if (0 != tracker->trigger_ref_count) {
+		MALI_PRINT(("TL:  %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)]  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char, tracker->trigger_ref_count,
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1],
+			    is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2],
+			    tracker->job));
+	} else {
+		MALI_PRINT(("TL:  %s %u %c  job:(0x%08X)\n",
+			    tracker_type, tracker->point, state_char,
+			    tracker->job));
+	}
+#endif
+}
+
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline)
+{
+	struct mali_timeline_tracker *tracker = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	tracker = timeline->tracker_tail;
+	while (NULL != tracker) {
+		mali_timeline_debug_direct_print_tracker(tracker);
+		tracker = tracker->timeline_next;
+	}
+}
+
+#endif
+
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx)
+{
+	int i;
+	int num_printed = 0;
+	u32 tid = _mali_osk_get_tid();
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	/* Print all timelines */
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline = system->timelines[i];
+
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (NULL == timeline->tracker_head) continue;
+
+		_mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n",
+				    timeline_id_to_string((enum mali_timeline_id)i));
+
+		mali_timeline_debug_print_timeline(timeline, print_ctx);
+		num_printed++;
+	}
+
+	if (0 == num_printed) {
+		_mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n");
+	}
+
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+}
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline.h b/drivers/gpu/arm/utgard/common/mali_timeline.h
new file mode 100644
index 000000000000..58d83839f4fe
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMELINE_H__
+#define __MALI_TIMELINE_H__
+
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_session.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+#include "mali_sync.h"
+#include "mali_scheduler_types.h"
+#include <linux/version.h>
+
+/**
+ * Soft job timeout.
+ *
+ * Soft jobs have to be signaled as complete after activation.  Normally this is done by user space,
+ * but in order to guarantee that every soft job is completed, we also have a timer.
+ */
+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */
+
+/**
+ * Timeline type.
+ */
+typedef enum mali_timeline_id {
+	MALI_TIMELINE_GP   = MALI_UK_TIMELINE_GP,   /**< GP job timeline. */
+	MALI_TIMELINE_PP   = MALI_UK_TIMELINE_PP,   /**< PP job timeline. */
+	MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */
+	MALI_TIMELINE_MAX  = MALI_UK_TIMELINE_MAX
+} mali_timeline_id;
+
+/**
+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker).
+ */
+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX
+
+/**
+ * Tracker type.
+ */
+typedef enum mali_timeline_tracker_type {
+	MALI_TIMELINE_TRACKER_GP   = 0, /**< Tracker used by GP jobs. */
+	MALI_TIMELINE_TRACKER_PP   = 1, /**< Tracker used by PP jobs. */
+	MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */
+	MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */
+	MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */
+	MALI_TIMELINE_TRACKER_MAX  = 5,
+} mali_timeline_tracker_type;
+
+/**
+ * Tracker activation error.
+ */
+typedef u32 mali_timeline_activation_error;
+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE      0
+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT  (1<<1)
+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0)
+
+/**
+ * Type used to represent a point on a timeline.
+ */
+typedef u32 mali_timeline_point;
+
+/**
+ * Used to represent that no point on a timeline.
+ */
+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0)
+
+/**
+ * The maximum span of points on a timeline.  A timeline will be considered full if the difference
+ * between the oldest and newest points is equal or larger to this value.
+ */
+#define MALI_TIMELINE_MAX_POINT_SPAN 65536
+
+/**
+ * Magic value used to assert on validity of trackers.
+ */
+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd
+
+struct mali_timeline;
+struct mali_timeline_waiter;
+struct mali_timeline_tracker;
+
+/**
+ * Timeline fence.
+ */
+struct mali_timeline_fence {
+	mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */
+	s32                 sync_fd;                   /**< A file descriptor representing a sync fence, or -1. */
+};
+
+/**
+ * Timeline system.
+ *
+ * The Timeline system has a set of timelines associated with a session.
+ */
+struct mali_timeline_system {
+	struct mali_spinlock_reentrant *spinlock;   /**< Spin lock protecting the timeline system */
+	struct mali_timeline           *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */
+
+	/* Single-linked list of unused waiter objects.  Uses the tracker_next field in tracker. */
+	struct mali_timeline_waiter    *waiter_empty_list;
+
+	struct mali_session_data       *session;    /**< Session that owns this system. */
+
+	mali_bool                       timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */
+
+	_mali_osk_wait_queue_t         *wait_queue; /**< Wait queue. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline           *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */
+#endif /* defined(CONFIG_SYNC) */
+};
+
+/**
+ * Timeline.  Each Timeline system will have MALI_TIMELINE_MAX timelines.
+ */
+struct mali_timeline {
+	mali_timeline_point           point_next;   /**< The next available point. */
+	mali_timeline_point           point_oldest; /**< The oldest point not released. */
+
+	/* Double-linked list of trackers.  Sorted in ascending order by tracker->time_number with
+	 * tail pointing to the tracker with the oldest time. */
+	struct mali_timeline_tracker *tracker_head;
+	struct mali_timeline_tracker *tracker_tail;
+
+	/* Double-linked list of waiters.  Sorted in ascending order by waiter->time_number_wait
+	 * with tail pointing to the waiter with oldest wait time. */
+	struct mali_timeline_waiter  *waiter_head;
+	struct mali_timeline_waiter  *waiter_tail;
+
+	struct mali_timeline_system  *system;       /**< Timeline system this timeline belongs to. */
+	enum mali_timeline_id         id;           /**< Timeline type. */
+
+#if defined(CONFIG_SYNC)
+	struct sync_timeline         *sync_tl;      /**< Sync timeline that corresponds to this timeline. */
+	mali_bool destroyed;
+	struct mali_spinlock_reentrant *spinlock;       /**< Spin lock protecting the timeline system */
+#endif /* defined(CONFIG_SYNC) */
+
+	/* The following fields are used to time out soft job trackers. */
+	_mali_osk_wq_delayed_work_t  *delayed_work;
+	mali_bool                     timer_active;
+};
+
+/**
+ * Timeline waiter.
+ */
+struct mali_timeline_waiter {
+	mali_timeline_point           point;         /**< Point on timeline we are waiting for to be released. */
+	struct mali_timeline_tracker *tracker;       /**< Tracker that is waiting. */
+
+	struct mali_timeline_waiter  *timeline_next; /**< Next waiter on timeline's waiter list. */
+	struct mali_timeline_waiter  *timeline_prev; /**< Previous waiter on timeline's waiter list. */
+
+	struct mali_timeline_waiter  *tracker_next;  /**< Next waiter on tracker's waiter list. */
+};
+
+/**
+ * Timeline tracker.
+ */
+struct mali_timeline_tracker {
+	MALI_DEBUG_CODE(u32            magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */
+
+	mali_timeline_point            point; /**< Point on timeline for this tracker */
+
+	struct mali_timeline_tracker  *timeline_next; /**< Next tracker on timeline's tracker list */
+	struct mali_timeline_tracker  *timeline_prev; /**< Previous tracker on timeline's tracker list */
+
+	u32                            trigger_ref_count; /**< When zero tracker will be activated */
+	mali_timeline_activation_error activation_error;  /**< Activation error. */
+	struct mali_timeline_fence     fence;             /**< Fence used to create this tracker */
+
+	/* Single-linked list of waiters.  Sorted in order of insertions with
+	 * tail pointing to first waiter. */
+	struct mali_timeline_waiter   *waiter_head;
+	struct mali_timeline_waiter   *waiter_tail;
+
+#if defined(CONFIG_SYNC)
+	/* These are only used if the tracker is waiting on a sync fence. */
+	struct mali_timeline_waiter   *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */
+	struct sync_fence_waiter       sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */
+	struct sync_fence             *sync_fence;   /**< The sync fence this tracker is waiting on. */
+	_mali_osk_list_t               sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */
+#endif /* defined(CONFIG_SYNC) */
+
+	struct mali_timeline_system   *system;       /**< Timeline system. */
+	struct mali_timeline          *timeline;     /**< Timeline, or NULL if not on a timeline. */
+	enum mali_timeline_tracker_type type;        /**< Type of tracker. */
+	void                          *job;          /**< Owner of tracker. */
+
+	/* The following fields are used to time out soft job trackers. */
+	unsigned long                 os_tick_create;
+	unsigned long                 os_tick_activate;
+	mali_bool                     timer_active;
+};
+
+extern _mali_osk_atomic_t gp_tracker_count;
+extern _mali_osk_atomic_t phy_pp_tracker_count;
+extern _mali_osk_atomic_t virt_pp_tracker_count;
+
+/**
+ * What follows is a set of functions to check the state of a timeline and to determine where on a
+ * timeline a given point is.  Most of these checks will translate the timeline so the oldest point
+ * on the timeline is aligned with zero.  Remember that all of these calculation are done on
+ * unsigned integers.
+ *
+ * The following example illustrates the three different states a point can be in.  The timeline has
+ * been translated to put the oldest point at zero:
+ *
+ *
+ *
+ *                               [ point is in forbidden zone ]
+ *                                          64k wide
+ *                                MALI_TIMELINE_MAX_POINT_SPAN
+ *
+ *    [ point is on timeline     )                            ( point is released ]
+ *
+ *    0--------------------------##############################--------------------2^32 - 1
+ *    ^                          ^
+ *    \                          |
+ *     oldest point on timeline  |
+ *                               \
+ *                                next point on timeline
+ */
+
+/**
+ * Compare two timeline points
+ *
+ * Returns true if a is after b, false if a is before or equal to b.
+ *
+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and
+ * the result will be correct if the points is less then UINT_MAX/2 apart.
+ *
+ * @param a Point on timeline
+ * @param b Point on timeline
+ * @return MALI_TRUE if a is after b
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b)
+{
+	return 0 > ((s32)b) - ((s32)a);
+}
+
+/**
+ * Check if a point is on timeline.  A point is on a timeline if it is greater than, or equal to,
+ * the oldest point, and less than the next point.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Check if a point has been released.  A point is released if it is older than the oldest point on
+ * the timeline, newer than the next point, and also not in the forbidden zone.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point has been release, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	mali_timeline_point point_normalized;
+	mali_timeline_point next_normalized;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	point_normalized = point - timeline->point_oldest;
+	next_normalized = timeline->point_next - timeline->point_oldest;
+
+	return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN);
+}
+
+/**
+ * Check if a point is valid.  A point is valid if is on the timeline or has been released.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return MALI_TRUE if point is valid, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point);
+}
+
+/**
+ * Check if timeline is empty (has no points on it).  A timeline is empty if next == oldest.
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return timeline->point_next == timeline->point_oldest;
+}
+
+/**
+ * Check if timeline is full.  A valid timeline cannot span more than 64k points (@ref
+ * MALI_TIMELINE_MAX_POINT_SPAN).
+ *
+ * @param timeline Timeline.
+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not.
+ */
+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline)
+{
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest);
+}
+
+/**
+ * Create a new timeline system.
+ *
+ * @param session The session this timeline system will belong to.
+ * @return New timeline system.
+ */
+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session);
+
+/**
+ * Abort timeline system.
+ *
+ * This will release all pending waiters in the timeline system causing all trackers to be
+ * activated.
+ *
+ * @param system Timeline system to abort all jobs from.
+ */
+void mali_timeline_system_abort(struct mali_timeline_system *system);
+
+/**
+ * Destroy an empty timeline system.
+ *
+ * @note @ref mali_timeline_system_abort() should be called prior to this function.
+ *
+ * @param system Timeline system to destroy.
+ */
+void mali_timeline_system_destroy(struct mali_timeline_system *system);
+
+/**
+ * Stop the soft job timer.
+ *
+ * @param system Timeline system
+ */
+void mali_timeline_system_stop_timer(struct mali_timeline_system *system);
+
+/**
+ * Add a tracker to a timeline system and optionally also on a timeline.
+ *
+ * Once added to the timeline system, the tracker is guaranteed to be activated.  The tracker can be
+ * activated before this function returns.  Thus, it is also possible that the tracker is released
+ * before this function returns, depending on the tracker type.
+ *
+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the
+ * timeline system.
+ *
+ * @param system Timeline system the tracker will be added to.
+ * @param tracker The tracker to be added.
+ * @param timeline_id Id of the timeline the tracker will be added to, or
+ *                    MALI_TIMELINE_NONE if it should not be added on a timeline.
+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline.
+ */
+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system,
+		struct mali_timeline_tracker *tracker,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Get latest point on timeline.
+ *
+ * @param system Timeline system.
+ * @param timeline_id Id of timeline to get latest point from.
+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty.
+ */
+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system,
+		enum mali_timeline_id timeline_id);
+
+/**
+ * Initialize tracker.
+ *
+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker).
+ *
+ * @param tracker Tracker to initialize.
+ * @param type Type of tracker.
+ * @param fence Fence used to set up dependencies for tracker.
+ * @param job Pointer to job struct this tracker is associated with.
+ */
+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker,
+				mali_timeline_tracker_type type,
+				struct mali_timeline_fence *fence,
+				void *job);
+
+/**
+ * Grab trigger ref count on tracker.
+ *
+ * This will prevent tracker from being activated until the trigger ref count reaches zero.
+ *
+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init).
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ */
+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker);
+
+/**
+ * Release trigger ref count on tracker.
+ *
+ * If the trigger ref count reaches zero, the tracker will be activated.
+ *
+ * @param system Timeline system.
+ * @param tracker Tracker.
+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error);
+
+/**
+ * Release a tracker from the timeline system.
+ *
+ * This is used to signal that the job being tracker is finished, either due to normal circumstances
+ * (job complete/abort) or due to a timeout.
+ *
+ * We may need to schedule some subsystems after a tracker has been released and the returned
+ * bitmask will tell us if it is necessary.  If the return value is non-zero, this value needs to be
+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling.
+ *
+ * @note Tracker must have been activated before being released.
+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead
+ * to a deadlock.
+ *
+ * @param tracker Tracker being released.
+ * @return Scheduling bitmask.
+ */
+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error(
+	struct mali_timeline_tracker *tracker)
+{
+	MALI_DEBUG_ASSERT_POINTER(tracker);
+	return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT &
+		tracker->activation_error) ? MALI_TRUE : MALI_FALSE;
+}
+
+/**
+ * Copy data from a UK fence to a Timeline fence.
+ *
+ * @param fence Timeline fence.
+ * @param uk_fence UK fence.
+ */
+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence);
+
+void mali_timeline_initialize(void);
+
+void mali_timeline_terminate(void);
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&gp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count);
+}
+
+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void)
+{
+	return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count);
+}
+
+#if defined(DEBUG)
+#define MALI_TIMELINE_DEBUG_FUNCTIONS
+#endif /* DEBUG */
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+
+/**
+ * Tracker state.  Used for debug printing.
+ */
+typedef enum mali_timeline_tracker_state {
+	MALI_TIMELINE_TS_INIT    = 0,
+	MALI_TIMELINE_TS_WAITING = 1,
+	MALI_TIMELINE_TS_ACTIVE  = 2,
+	MALI_TIMELINE_TS_FINISH  = 3,
+} mali_timeline_tracker_state;
+
+/**
+ * Get tracker state.
+ *
+ * @param tracker Tracker to check.
+ * @return State of tracker.
+ */
+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker);
+
+/**
+ * Print debug information about tracker.
+ *
+ * @param tracker Tracker to print.
+ */
+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx);
+
+/**
+ * Print debug information about timeline.
+ *
+ * @param timeline Timeline to print.
+ */
+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx);
+
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0))
+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker);
+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline);
+#endif
+
+/**
+ * Print debug information about timeline system.
+ *
+ * @param system Timeline system to print.
+ */
+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx);
+
+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */
+
+#endif /* __MALI_TIMELINE_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.c b/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.c
new file mode 100644
index 000000000000..3c58928dd3a2
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_fence_wait.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_spinlock_reentrant.h"
+
+/**
+ * Allocate a fence waiter tracker.
+ *
+ * @return New fence waiter if successful, NULL if not.
+ */
+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void)
+{
+	return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker));
+}
+
+/**
+ * Free fence waiter tracker.
+ *
+ * @param wait Fence wait tracker to free.
+ */
+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait)
+{
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	_mali_osk_atomic_term(&wait->refcount);
+	_mali_osk_free(wait);
+}
+
+/**
+ * Check if fence wait tracker has been activated.  Used as a wait queue condition.
+ *
+ * @param data Fence waiter.
+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+
+	wait = (struct mali_timeline_fence_wait_tracker *) data;
+	MALI_DEBUG_ASSERT_POINTER(wait);
+
+	return wait->activated;
+}
+
+/**
+ * Check if fence has been signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Timeline fence.
+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not.
+ */
+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	int i;
+	u32 tid = _mali_osk_get_tid();
+	mali_bool ret = MALI_TRUE;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *sync_fence = NULL;
+#endif
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	mali_spinlock_reentrant_wait(system->spinlock, tid);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		mali_timeline_point   point;
+
+		point = fence->points[i];
+
+		if (likely(MALI_TIMELINE_NO_POINT == point)) {
+			/* Fence contains no point on this timeline. */
+			continue;
+		}
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		if (unlikely(!mali_timeline_is_point_valid(timeline, point))) {
+			MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next));
+		}
+
+		if (!mali_timeline_is_point_released(timeline, point)) {
+			ret = MALI_FALSE;
+			goto exit;
+		}
+	}
+
+#if defined(CONFIG_SYNC)
+	if (-1 != fence->sync_fd) {
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (likely(NULL != sync_fence)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+			if (0 == sync_fence->status) {
+#else
+			if (0 == atomic_read(&sync_fence->status)) {
+#endif
+				ret = MALI_FALSE;
+			}
+		} else {
+			MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd));
+		}
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+exit:
+	mali_spinlock_reentrant_signal(system->spinlock, tid);
+
+#if defined(CONFIG_SYNC)
+	if (NULL != sync_fence) {
+		sync_fence_put(sync_fence);
+	}
+#endif /* defined(CONFIG_SYNC) */
+
+	return ret;
+}
+
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout)
+{
+	struct mali_timeline_fence_wait_tracker *wait;
+	mali_timeline_point point;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n"));
+
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) {
+		return mali_timeline_fence_wait_check_status(system, fence);
+	}
+
+	wait = mali_timeline_fence_wait_tracker_alloc();
+	if (unlikely(NULL == wait)) {
+		MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n"));
+		return MALI_FALSE;
+	}
+
+	wait->activated = MALI_FALSE;
+	wait->system = system;
+
+	/* Initialize refcount to two references.  The reference first will be released by this
+	 * function after the wait is over.  The second reference will be released when the tracker
+	 * is activated. */
+	_mali_osk_atomic_init(&wait->refcount, 2);
+
+	/* Add tracker to timeline system, but not to a timeline. */
+	mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait);
+	point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+	MALI_IGNORE(point);
+
+	/* Wait for the tracker to be activated or time out. */
+	if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) {
+		_mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait);
+	} else {
+		_mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout);
+	}
+
+	ret = wait->activated;
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+
+	return ret;
+}
+
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(wait);
+	MALI_DEBUG_ASSERT_POINTER(wait->system);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n"));
+
+	MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated);
+	wait->activated = MALI_TRUE;
+
+	_mali_osk_wait_queue_wake_up(wait->system->wait_queue);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&wait->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+	MALI_IGNORE(schedule_mask);
+
+	if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) {
+		mali_timeline_fence_wait_tracker_free(wait);
+	}
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.h b/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.h
new file mode 100644
index 000000000000..f5440ab6fc6d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline_fence_wait.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_fence_wait.h
+ *
+ * This file contains functions used to wait until a Timeline fence is signaled.
+ */
+
+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__
+#define __MALI_TIMELINE_FENCE_WAIT_H__
+
+#include "mali_osk.h"
+#include "mali_timeline.h"
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the
+ * function only returns when the fence is signaled.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1)
+
+/**
+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return
+ * immediately with the current state of the fence.
+ */
+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0
+
+/**
+ * Fence wait tracker.
+ *
+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a
+ * dependency.  We will then perform a blocking wait, possibly with a timeout, until the tracker is
+ * activated, which happens when the fence is signaled.
+ */
+struct mali_timeline_fence_wait_tracker {
+	mali_bool activated;                  /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */
+	_mali_osk_atomic_t refcount;          /**< Reference count. */
+	struct mali_timeline_system *system;  /**< Timeline system. */
+	struct mali_timeline_tracker tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Wait for a fence to be signaled, or timeout is reached.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to wait on.
+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or
+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY.
+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out.
+ */
+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout);
+
+/**
+ * Used by the Timeline system to activate a fence wait tracker.
+ *
+ * @param fence_wait_tracker Fence waiter tracker.
+ */
+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker);
+
+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.c b/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.c
new file mode 100644
index 000000000000..73843f07c990
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timeline_sync_fence.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_sync.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Creates a sync fence tracker and a sync fence.  Adds sync fence tracker to Timeline system and
+ * returns sync fence.  The sync fence will be signaled when the sync fence tracker is activated.
+ *
+ * @param timeline Timeline.
+ * @param point Point on timeline.
+ * @return Sync fence that will be signaled when tracker is activated.
+ */
+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point)
+{
+	struct mali_timeline_sync_fence_tracker *sync_fence_tracker;
+	struct sync_fence                       *sync_fence;
+	struct mali_timeline_fence               fence;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point);
+
+	/* Allocate sync fence tracker. */
+	sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker));
+	if (NULL == sync_fence_tracker) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n"));
+		return NULL;
+	}
+
+	/* Create sync flag. */
+	MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl);
+	sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point);
+	if (NULL == sync_fence_tracker->flag) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n"));
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Create sync fence from sync flag. */
+	sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag);
+	if (NULL == sync_fence) {
+		MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n"));
+		mali_sync_flag_put(sync_fence_tracker->flag);
+		_mali_osk_free(sync_fence_tracker);
+		return NULL;
+	}
+
+	/* Setup fence for tracker. */
+	_mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence));
+	fence.sync_fd = -1;
+	fence.points[timeline->id] = point;
+
+	/* Finally, add the tracker to Timeline system. */
+	mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker);
+	point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE);
+	MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point);
+
+	return sync_fence;
+}
+
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence)
+{
+	u32 i;
+	struct sync_fence *sync_fence_acc = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(system);
+	MALI_DEBUG_ASSERT_POINTER(fence);
+
+	for (i = 0; i < MALI_TIMELINE_MAX; ++i) {
+		struct mali_timeline *timeline;
+		struct sync_fence *sync_fence;
+
+		if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue;
+
+		timeline = system->timelines[i];
+		MALI_DEBUG_ASSERT_POINTER(timeline);
+
+		sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			/* Merge sync fences. */
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			/* This was the first sync fence created. */
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (-1 != fence->sync_fd) {
+		struct sync_fence *sync_fence;
+
+		sync_fence = sync_fence_fdget(fence->sync_fd);
+		if (NULL == sync_fence) goto error;
+
+		if (NULL != sync_fence_acc) {
+			sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence);
+			if (NULL == sync_fence_acc) goto error;
+		} else {
+			sync_fence_acc = sync_fence;
+		}
+	}
+
+	if (NULL == sync_fence_acc) {
+		MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl);
+
+		/* There was nothing to wait on, so return an already signaled fence. */
+
+		sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl);
+		if (NULL == sync_fence_acc) goto error;
+	}
+
+	/* Return file descriptor for the accumulated sync fence. */
+	return mali_sync_fence_fd_alloc(sync_fence_acc);
+
+error:
+	if (NULL != sync_fence_acc) {
+		sync_fence_put(sync_fence_acc);
+	}
+
+	return -1;
+}
+
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker)
+{
+	mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag);
+
+	MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n"));
+
+	/* Signal flag and release reference. */
+	mali_sync_flag_signal(sync_fence_tracker->flag, 0);
+	mali_sync_flag_put(sync_fence_tracker->flag);
+
+	/* Nothing can wait on this tracker, so nothing to schedule after release. */
+	schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker);
+	MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask);
+
+	_mali_osk_free(sync_fence_tracker);
+}
+
+#endif /* defined(CONFIG_SYNC) */
diff --git a/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.h b/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.h
new file mode 100644
index 000000000000..29a3822457e9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_timeline_sync_fence.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_timeline_sync_fence.h
+ *
+ * This file contains code related to creating sync fences from timeline fences.
+ */
+
+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__
+#define __MALI_TIMELINE_SYNC_FENCE_H__
+
+#include "mali_timeline.h"
+
+#if defined(CONFIG_SYNC)
+
+/**
+ * Sync fence tracker.
+ */
+struct mali_timeline_sync_fence_tracker {
+	struct mali_sync_flag        *flag;    /**< Sync flag used to connect tracker and sync fence. */
+	struct mali_timeline_tracker  tracker; /**< Timeline tracker. */
+};
+
+/**
+ * Create a sync fence that will be signaled when @ref fence is signaled.
+ *
+ * @param system Timeline system.
+ * @param fence Fence to create sync fence from.
+ * @return File descriptor for new sync fence, or -1 on error.
+ */
+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence);
+
+/**
+ * Used by the Timeline system to activate a sync fence tracker.
+ *
+ * @param sync_fence_tracker Sync fence tracker.
+ *
+ */
+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_ukk.h b/drivers/gpu/arm/utgard/common/mali_ukk.h
new file mode 100644
index 000000000000..597685a53f3b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_ukk.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk.h
+ * Defines the kernel-side interface of the user-kernel interface
+ */
+
+#ifndef __MALI_UKK_H__
+#define __MALI_UKK_H__
+
+#include "mali_osk.h"
+#include "mali_uk_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * - The _mali_uk functions are an abstraction of the interface to the device
+ * driver. On certain OSs, this would be implemented via the IOCTL interface.
+ * On other OSs, it could be via extension of some Device Driver Class, or
+ * direct function call for Bare metal/RTOSs.
+ * - It is important to note that:
+ *   -  The Device Driver has implemented the _mali_ukk set of functions
+ *   -  The Base Driver calls the corresponding set of _mali_uku functions.
+ * - What requires porting is solely the calling mechanism from User-side to
+ * Kernel-side, and propagating back the results.
+ * - Each U/K function is associated with a (group, number) pair from
+ * \ref _mali_uk_functions to make it possible for a common function in the
+ * Base Driver and Device Driver to route User/Kernel calls from/to the
+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number
+ * would be formed based on the group and number assigned to the _mali_uk
+ * function, as listed in \ref _mali_uk_functions. On the user-side, each
+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an
+ * encoded form of the (group, number) pair. On the kernel-side, the Device
+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number)
+ * pair, and uses this to determine which corresponding _mali_ukk should be
+ * called.
+ *   - Refer to \ref _mali_uk_functions for more information about this
+ * (group, number) pairing.
+ * - In a system where there is no distinction between user and kernel-side,
+ * the U/K interface may be implemented as:@code
+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args )
+ * {
+ *     return mali_ukk_examplefunction( args );
+ * }
+ * @endcode
+ * - Therefore, all U/K calls behave \em as \em though they were direct
+ * function calls (but the \b implementation \em need \em not be a direct
+ * function calls)
+ *
+ * @note Naming the _mali_uk functions the same on both User and Kernel sides
+ * on non-RTOS systems causes debugging issues when setting breakpoints. In
+ * this case, it is not clear which function the breakpoint is put on.
+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku
+ * and in kernel space with \c _mali_ukk. The naming for the argument
+ * structures is unaffected.
+ *
+ * - The _mali_uk functions are synchronous.
+ * - Arguments to the _mali_uk functions are passed in a structure. The only
+ * parameter passed to the _mali_uk functions is a pointer to this structure.
+ * This first member of this structure, ctx, is a pointer to a context returned
+ * by _mali_uku_open(). For example:@code
+ * typedef struct
+ * {
+ *     void *ctx;
+ *     u32 number_of_cores;
+ * } _mali_uk_get_gp_number_of_cores_s;
+ * @endcode
+ *
+ * - Each _mali_uk function has its own argument structure named after the
+ *  function. The argument is distinguished by the _s suffix.
+ * - The argument types are defined by the base driver and user-kernel
+ *  interface.
+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t.
+ * - Only arguments of type input or input/output need be initialized before
+ * calling a _mali_uk function.
+ * - Arguments of type output and input/output are only valid when the
+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK.
+ * - The \c ctx member is always invalid after it has been used by a
+ * _mali_uk function, except for the context management functions
+ *
+ *
+ * \b Interface \b restrictions
+ *
+ * The requirements of the interface mean that an implementation of the
+ * User-kernel interface may do no 'real' work. For example, the following are
+ * illegal in the User-kernel implementation:
+ * - Calling functions necessary for operation on all systems,  which would
+ * not otherwise get called on RTOS systems.
+ *     - For example, a  U/K interface that calls multiple _mali_ukk functions
+ * during one particular U/K call. This could not be achieved by the same code
+ * which uses direct function calls for the U/K interface.
+ * -  Writing in values to the args members, when otherwise these members would
+ * not hold a useful value for a direct function call U/K interface.
+ *     - For example, U/K interface implementation that take NULL members in
+ * their arguments structure from the user side, but those members are
+ * replaced with non-NULL values in the kernel-side of the U/K interface
+ * implementation. A scratch area for writing data is one such example. In this
+ * case, a direct function call U/K interface would segfault, because no code
+ * would be present to replace the NULL pointer with a meaningful pointer.
+ *     - Note that we discourage the case where the U/K implementation changes
+ * a NULL argument member to non-NULL, and then the Device Driver code (outside
+ * of the U/K layer) re-checks this member for NULL, and corrects it when
+ * necessary. Whilst such code works even on direct function call U/K
+ * intefaces, it reduces the testing coverage of the Device Driver code. This
+ * is because we have no way of testing the NULL == value path on an OS
+ * implementation.
+ *
+ * A number of allowable examples exist where U/K interfaces do 'real' work:
+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info
+ *     - In this case, without the pointer switching on direct function call
+ * U/K interface, the Device Driver code still sees the same thing: a pointer
+ * to which it can write memory. This is because such a system has no
+ * distinction between a user and kernel pointer.
+ * - Writing an OS-specific value into the ukk_private member for
+ * _mali_ukk_mem_mmap().
+ *     - In this case, this value is passed around by Device Driver code, but
+ * its actual value is never checked. Device Driver code simply passes it from
+ * the U/K layer to the OSK layer, where it can be acted upon. In this case,
+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK
+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the
+ *  meaning of ukk_private member. On other OSs, it may be unused by both
+ * U/K and OSK layers
+ *     - Therefore, on error inside the U/K interface implementation itself,
+ * it will be as though the _mali_ukk function itself had failed, and cleaned
+ * up after itself.
+ *     - Compare this to a direct function call U/K implementation, where all
+ * error cleanup is handled by the _mali_ukk function itself. The direct
+ * function call U/K interface implementation is automatically atomic.
+ *
+ * The last example highlights a consequence of all U/K interface
+ * implementations: they must be atomic with respect to the Device Driver code.
+ * And therefore, should Device Driver code succeed but the U/K implementation
+ * fail afterwards (but before return to user-space), then the U/K
+ * implementation must cause appropriate cleanup actions to preserve the
+ * atomicity of the interface.
+ *
+ * @{
+ */
+
+
+/** @defgroup _mali_uk_context U/K Context management
+ *
+ * These functions allow for initialisation of the user-kernel interface once per process.
+ *
+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further
+ * state information required by the specific implementation. The context is shareable among all threads in the caller process.
+ *
+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver.
+ *
+ * On a bare-metal/RTOS system with no distinction between kernel and
+ * user-space, the U/K interface simply calls the _mali_ukk variant of the
+ * function by direct function call. In this case, the context returned is the
+ * mali_session_data from _mali_ukk_open().
+ *
+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to
+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context
+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context
+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter
+ * in the argument structure therefore has to be of type input/output.
+ *
+ * It should be noted that the caller cannot reuse the \c ctx member of U/K
+ * argument structure after a U/K call, because it may be overwritten. Instead,
+ * the context handle must always be stored  elsewhere, and copied into
+ * the appropriate U/K argument structure for each user-side call to
+ * the U/K interface. This is not usually a problem, since U/K argument
+ * structures are usually placed on the stack.
+ *
+ * @{ */
+
+/** @brief Begin a new Mali Device Driver session
+ *
+ * This is used to obtain a per-process context handle for all future U/K calls.
+ *
+ * @param context pointer to storage to return a (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_open(void **context);
+
+/** @brief End a Mali Device Driver session
+ *
+ * This should be called when the process no longer requires use of the Mali Device Driver.
+ *
+ * The context handle must not be used after it has been closed.
+ *
+ * @param context pointer to a stored (void*)context handle.
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_close(void **context);
+
+/** @} */ /* end group _mali_uk_context */
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ *
+ * The core functions provide the following functionality:
+ * - verify that the user and kernel API are compatible
+ * - retrieve information about the cores and memory banks in the system
+ * - wait for the result of jobs started on a core
+ *
+ * @{ */
+
+/** @brief Waits for a job notification.
+ *
+ * Sleeps until notified or a timeout occurs. Returns information about the notification.
+ *
+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args);
+
+/** @brief Post a notification to the notification queue of this application.
+ *
+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * This function is obsolete, but kept to allow old, incompatible user space
+ * clients to robustly detect the incompatibility.
+ *
+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args);
+
+/** @brief Verifies if the user and kernel side of this API are compatible.
+ *
+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args);
+
+/** @brief Get the user space settings applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args);
+
+/** @brief Get a user space setting applicable for calling process.
+ *
+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args);
+
+/* @brief Grant or deny high priority scheduling for this session.
+ *
+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args);
+
+/** @brief Make process sleep if the pending big job in kernel  >= MALI_MAX_PENDING_BIG_JOB
+ *
+ */
+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args);
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ *
+ * The memory functions provide functionality with and without a Mali-MMU present.
+ *
+ * For Mali-MMU based systems, the following functionality is provided:
+ * - Initialize and terminate MALI virtual address space
+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the
+ * current process address space
+ * - Map/unmap external physical memory into the MALI virtual address range
+ *
+ * For Mali-nonMMU based systems:
+ * - Allocate/deallocate MALI memory
+ *
+ * @{ */
+
+/** @brief Map Mali Memory into the current user process
+ *
+ * Maps Mali memory into the current user process in a generic way.
+ *
+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes,
+ * but should not be called by a user process in Mali-nonMMU mode.
+ *
+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU
+ * or Mali-nonMMU:
+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K
+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are
+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired.
+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr
+ * member is used for the \em Mali-virtual address desired for the mapping. The
+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual
+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual
+ * address range to a sequence of non-contiguous physical pages. In this case,
+ * the CPU-physical addresses are not communicated back to the user-side, as
+ * they are unnecsessary; the \em Mali-virtual address range must be used for
+ * programming Mali structures.
+ *
+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of
+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the
+ * \em Mali-virtual address range from the user-side.
+ *
+ * @note Mali-virtual address ranges are entirely separate between processes.
+ * It is not possible for a process to accidentally corrupt another process'
+ * \em Mali-virtual address space.
+ *
+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args);
+
+/** @brief Unmap Mali Memory from the current user process
+ *
+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied
+ * from _mali_ukk_mem_mmap().
+ *
+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args);
+
+/** @brief Determine the buffer size necessary for an MMU page table dump.
+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args);
+/** @brief Dump MMU Page tables.
+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args);
+
+/** @brief Write user data to specified Mali memory without causing segfaults.
+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args);
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ *
+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality:
+ * - retrieving version of the fragment processors
+ * - determine number of fragment processors
+ * - starting a job on a fragment processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Fragment Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs);
+
+/**
+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor.
+ *
+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job.
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Fragment Processors in the system
+ *
+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Fragment Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args);
+
+/** @brief Disable Write-back unit(s) on specified job
+ *
+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h"
+ */
+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args);
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ *
+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality:
+ * - retrieving version of the Vertex Processors
+ * - determine number of Vertex Processors available
+ * - starting a job on a Vertex Processor
+ *
+ * @{ */
+
+/** @brief Issue a request to start a new job on a Vertex Processor.
+ *
+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can
+ * try to start the job again.
+ *
+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job
+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the
+ * existing one returned, otherwise the new job is started and the status field args->status is set to
+ * _MALI_UK_START_JOB_STARTED.
+ *
+ * Job completion can be awaited with _mali_ukk_wait_for_notification().
+ *
+ * @param ctx user-kernel context (mali_session)
+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data!
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs);
+
+/** @brief Returns the number of Vertex Processors in the system.
+ *
+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args);
+
+/** @brief Returns the version that all Vertex Processor cores are compatible with.
+ *
+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex
+ * Processor core is available.
+ *
+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args);
+
+/** @brief Resume or abort suspended Vertex Processor jobs.
+ *
+ * After receiving notification that a Vertex Processor job was suspended from
+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job.
+ *
+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h"
+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure.
+ */
+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args);
+
+/** @} */ /* end group _mali_uk_gp */
+
+#if defined(CONFIG_MALI400_PROFILING)
+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module
+ * @{ */
+
+/** @brief Add event to profiling buffer.
+ *
+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args);
+
+/** @brief Get profiling stream fd.
+ *
+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args);
+
+/** @brief Profiling control set.
+ *
+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args);
+
+/** @} */ /* end group _mali_uk_profiling */
+#endif
+
+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module
+ * @{ */
+
+/** @brief Report events related to vsync.
+ *
+ * @note Events should be reported when starting to wait for vsync and when the
+ * waiting is finished. This information can then be used in kernel space to
+ * complement the GPU utilization metric.
+ *
+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args);
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting
+ * @{ */
+
+/** @brief Report software counters.
+ *
+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h"
+ */
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args);
+
+/** @} */ /* end group _mali_sw_counters_report */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+u32 _mali_ukk_report_memory_usage(void);
+
+u32 _mali_ukk_report_total_memory_size(void);
+
+u32 _mali_ukk_utilization_gp_pp(void);
+
+u32 _mali_ukk_utilization_gp(void);
+
+u32 _mali_ukk_utilization_pp(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_H__ */
diff --git a/drivers/gpu/arm/utgard/common/mali_user_settings_db.c b/drivers/gpu/arm/utgard/common/mali_user_settings_db.c
new file mode 100644
index 000000000000..54e1580fad1a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_user_settings_db.c
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_user_settings_db.h"
+#include "mali_session.h"
+
+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX];
+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS;
+
+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool done = MALI_FALSE;
+
+	/*
+	 * This function gets a bit complicated because we can't hold the session lock while
+	 * allocating notification objects.
+	 */
+
+	while (!done) {
+		u32 i;
+		u32 num_sessions_alloc;
+		u32 num_sessions_with_lock;
+		u32 used_notification_objects = 0;
+		_mali_osk_notification_t **notobjs;
+
+		/* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */
+		num_sessions_alloc = mali_session_get_count();
+		if (0 == num_sessions_alloc) {
+			/* No sessions to report to */
+			return;
+		}
+
+		notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc);
+		if (NULL == notobjs) {
+			MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n"));
+			return;
+		}
+
+		for (i = 0; i < num_sessions_alloc; i++) {
+			notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED,
+					sizeof(_mali_uk_settings_changed_s));
+			if (NULL != notobjs[i]) {
+				_mali_uk_settings_changed_s *data;
+				data = notobjs[i]->result_buffer;
+
+				data->setting = setting;
+				data->value = value;
+			} else {
+				MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i));
+			}
+		}
+
+		mali_session_lock();
+
+		/* number of sessions will not change while we hold the lock */
+		num_sessions_with_lock = mali_session_get_count();
+
+		if (num_sessions_alloc >= num_sessions_with_lock) {
+			/* We have allocated enough notification objects for all the sessions atm */
+			struct mali_session_data *session, *tmp;
+			MALI_SESSION_FOREACH(session, tmp, link) {
+				MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc);
+				if (NULL != notobjs[used_notification_objects]) {
+					mali_session_send_notification(session, notobjs[used_notification_objects]);
+					notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */
+				}
+				used_notification_objects++;
+			}
+			done = MALI_TRUE;
+		}
+
+		mali_session_unlock();
+
+		/* Delete any remaining/unused notification objects */
+		for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) {
+			if (NULL != notobjs[used_notification_objects]) {
+				_mali_osk_notification_delete(notobjs[used_notification_objects]);
+			}
+		}
+
+		_mali_osk_free(notobjs);
+	}
+}
+
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value)
+{
+	mali_bool notify = MALI_FALSE;
+
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n"));
+		return;
+	}
+
+	if (mali_user_settings[setting] != value) {
+		notify = MALI_TRUE;
+	}
+
+	mali_user_settings[setting] = value;
+
+	if (notify) {
+		mali_user_settings_notify(setting, value);
+	}
+}
+
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting)
+{
+	if (setting >= _MALI_UK_USER_SETTING_MAX) {
+		return 0;
+	}
+
+	return mali_user_settings[setting];
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args)
+{
+	_mali_uk_user_setting_t setting;
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	setting = args->setting;
+
+	if (_MALI_UK_USER_SETTING_MAX > setting) {
+		args->value = mali_user_settings[setting];
+		return _MALI_OSK_ERR_OK;
+	} else {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args)
+{
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	_mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings));
+
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/common/mali_user_settings_db.h b/drivers/gpu/arm/utgard/common/mali_user_settings_db.h
new file mode 100644
index 000000000000..0732c3e56e2a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/common/mali_user_settings_db.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2012-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_USER_SETTINGS_DB_H__
+#define __MALI_USER_SETTINGS_DB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+
+/** @brief Set Mali user setting in DB
+ *
+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change.
+ *
+ * @param setting the setting to be changed
+ * @param value the new value to set
+ */
+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value);
+
+/** @brief Get current Mali user setting value from DB
+ *
+ * @param setting the setting to extract
+ * @return the value of the selected setting
+ */
+u32 mali_get_user_setting(_mali_uk_user_setting_t setting);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* __MALI_KERNEL_USER_SETTING__ */
diff --git a/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard.h b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard.h
new file mode 100644
index 000000000000..4ea02fe65cb5
--- /dev/null
+++ b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard.h
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_utgard.h
+ * Defines types and interface exposed by the Mali Utgard device driver
+ */
+
+#ifndef __MALI_UTGARD_H__
+#define __MALI_UTGARD_H__
+
+#include "mali_osk_types.h"
+
+#define MALI_GPU_NAME_UTGARD "mali-utgard"
+
+
+#define MALI_OFFSET_GP                    0x00000
+#define MALI_OFFSET_GP_MMU                0x03000
+
+#define MALI_OFFSET_PP0                   0x08000
+#define MALI_OFFSET_PP0_MMU               0x04000
+#define MALI_OFFSET_PP1                   0x0A000
+#define MALI_OFFSET_PP1_MMU               0x05000
+#define MALI_OFFSET_PP2                   0x0C000
+#define MALI_OFFSET_PP2_MMU               0x06000
+#define MALI_OFFSET_PP3                   0x0E000
+#define MALI_OFFSET_PP3_MMU               0x07000
+
+#define MALI_OFFSET_PP4                   0x28000
+#define MALI_OFFSET_PP4_MMU               0x1C000
+#define MALI_OFFSET_PP5                   0x2A000
+#define MALI_OFFSET_PP5_MMU               0x1D000
+#define MALI_OFFSET_PP6                   0x2C000
+#define MALI_OFFSET_PP6_MMU               0x1E000
+#define MALI_OFFSET_PP7                   0x2E000
+#define MALI_OFFSET_PP7_MMU               0x1F000
+
+#define MALI_OFFSET_L2_RESOURCE0          0x01000
+#define MALI_OFFSET_L2_RESOURCE1          0x10000
+#define MALI_OFFSET_L2_RESOURCE2          0x11000
+
+#define MALI400_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE0          MALI_OFFSET_L2_RESOURCE1
+#define MALI450_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+#define MALI450_OFFSET_L2_CACHE2          MALI_OFFSET_L2_RESOURCE2
+#define MALI470_OFFSET_L2_CACHE1          MALI_OFFSET_L2_RESOURCE0
+
+#define MALI_OFFSET_BCAST                 0x13000
+#define MALI_OFFSET_DLBU                  0x14000
+
+#define MALI_OFFSET_PP_BCAST              0x16000
+#define MALI_OFFSET_PP_BCAST_MMU          0x15000
+
+#define MALI_OFFSET_PMU                   0x02000
+#define MALI_OFFSET_DMA                   0x12000
+
+/* Mali-300 */
+
+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq)
+
+/* Mali-400 */
+
+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq)
+
+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali-450 */
+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \
+	MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA)
+
+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+	/* Mali - 470 */
+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \
+	MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \
+	MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \
+	MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \
+	MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \
+	MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU)
+
+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \
+	MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \
+
+#define MALI_GPU_RESOURCE_L2(addr) \
+	{ \
+		.name = "Mali_L2", \
+			.flags = IORESOURCE_MEM, \
+				 .start = addr, \
+					  .end   = addr + 0x200, \
+	},
+
+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \
+	{ \
+		.name = "Mali_GP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_addr, \
+					  .end =   gp_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_irq, \
+					  .end   = gp_irq, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = gp_mmu_addr, \
+					  .end =   gp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_GP_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = gp_mmu_irq, \
+					  .end =   gp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \
+	{ \
+		.name = "Mali_PP" #id, \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_addr, \
+					  .end =   pp_mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_PP" #id "_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_mmu_irq, \
+					  .end =   pp_mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \
+	{ \
+		.name = "Mali_MMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = mmu_addr, \
+					  .end =   mmu_addr + 0x100, \
+	}, \
+	{ \
+		.name = "Mali_MMU_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = mmu_irq, \
+					  .end =   mmu_irq, \
+	},
+
+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \
+	{ \
+		.name = "Mali_PMU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pmu_addr, \
+					  .end =   pmu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DMA(dma_addr) \
+	{ \
+		.name = "Mali_DMA", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dma_addr, \
+					  .end = dma_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \
+	{ \
+		.name = "Mali_DLBU", \
+			.flags = IORESOURCE_MEM, \
+				 .start = dlbu_addr, \
+					  .end = dlbu_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \
+	{ \
+		.name = "Mali_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = bcast_addr, \
+					  .end = bcast_addr + 0x100, \
+	},
+
+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \
+	{ \
+		.name = "Mali_PP_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_addr, \
+					  .end =   pp_addr + 0x1100, \
+	}, \
+	{ \
+		.name = "Mali_PP_Broadcast_IRQ", \
+			.flags = IORESOURCE_IRQ, \
+				 .start = pp_irq, \
+					  .end =   pp_irq, \
+	}, \
+
+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \
+	{ \
+		.name = "Mali_PP_MMU_Broadcast", \
+			.flags = IORESOURCE_MEM, \
+				 .start = pp_mmu_bcast_addr, \
+					  .end = pp_mmu_bcast_addr + 0x100, \
+	},
+
+	struct mali_gpu_utilization_data {
+		unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_gp;  /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */
+		unsigned int utilization_pp;  /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */
+	};
+
+	struct mali_gpu_clk_item {
+		unsigned int clock; /* unit(MHz) */
+		unsigned int vol;
+	};
+
+	struct mali_gpu_clock {
+		struct mali_gpu_clk_item *item;
+		unsigned int num_of_steps;
+	};
+
+	struct mali_gpu_device_data {
+		/* Shared GPU memory */
+		unsigned long shared_mem_size;
+
+		/*
+		 * Mali PMU switch delay.
+		 * Only needed if the power gates are connected to the PMU in a high fanout
+		 * network. This value is the number of Mali clock cycles it takes to
+		 * enable the power gates and turn on the power mesh.
+		 * This value will have no effect if a daisy chain implementation is used.
+		 */
+		u32 pmu_switch_delay;
+
+		/* Mali Dynamic power domain configuration in sequence from 0-11
+		 *  GP  PP0 PP1  PP2  PP3  PP4  PP5  PP6  PP7, L2$0 L2$1 L2$2
+		 */
+		u16 pmu_domain_config[12];
+
+		/* Dedicated GPU memory range (physical). */
+		unsigned long dedicated_mem_start;
+		unsigned long dedicated_mem_size;
+
+		/* Frame buffer memory to be accessible by Mali GPU (physical) */
+		unsigned long fb_start;
+		unsigned long fb_size;
+
+		/* Max runtime [ms] for jobs */
+		int max_job_runtime;
+
+		/* Report GPU utilization and related control in this interval (specified in ms) */
+		unsigned long control_interval;
+
+		/* Function that will receive periodic GPU utilization numbers */
+		void (*utilization_callback)(struct mali_gpu_utilization_data *data);
+
+		/* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */
+		int (*set_freq)(int setting_clock_step);
+		/* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */
+		void (*get_clock_info)(struct mali_gpu_clock **data);
+		/* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */
+		int (*get_freq)(void);
+	};
+
+	/**
+	 * Pause the scheduling and power state changes of Mali device driver.
+	 * mali_dev_resume() must always be called as soon as possible after this function
+	 * in order to resume normal operation of the Mali driver.
+	 */
+	void mali_dev_pause(void);
+
+	/**
+	 * Resume scheduling and allow power changes in Mali device driver.
+	 * This must always be called after mali_dev_pause().
+	 */
+	void mali_dev_resume(void);
+
+	/** @brief Set the desired number of PP cores to use.
+	 *
+	 * The internal Mali PMU will be used, if present, to physically power off the PP cores.
+	 *
+	 * @param num_cores The number of desired cores
+	 * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified.
+	 */
+	int mali_perf_set_num_pp_cores(unsigned int num_cores);
+
+#endif
diff --git a/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_ioctl.h b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_ioctl.h
new file mode 100644
index 000000000000..6a6e69ab6978
--- /dev/null
+++ b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_ioctl.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_IOCTL_H__
+#define __MALI_UTGARD_IOCTL_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/fs.h>       /* file system operations */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file mali_kernel_ioctl.h
+ * Interface to the Linux device driver.
+ * This file describes the interface needed to use the Linux device driver.
+ * Its interface is designed to used by the HAL implementation through a thin arch layer.
+ */
+
+/**
+ * ioctl commands
+ */
+
+#define MALI_IOC_BASE           0x82
+#define MALI_IOC_CORE_BASE      (_MALI_UK_CORE_SUBSYSTEM      + MALI_IOC_BASE)
+#define MALI_IOC_MEMORY_BASE    (_MALI_UK_MEMORY_SUBSYSTEM    + MALI_IOC_BASE)
+#define MALI_IOC_PP_BASE        (_MALI_UK_PP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_GP_BASE        (_MALI_UK_GP_SUBSYSTEM        + MALI_IOC_BASE)
+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE)
+#define MALI_IOC_VSYNC_BASE     (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE)
+
+#define MALI_IOC_WAIT_FOR_NOTIFICATION      _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s)
+#define MALI_IOC_GET_API_VERSION            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32)
+#define MALI_IOC_GET_API_VERSION_V2         _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s)
+#define MALI_IOC_POST_NOTIFICATION          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s)
+#define MALI_IOC_GET_USER_SETTING           _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s)
+#define MALI_IOC_GET_USER_SETTINGS          _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s)
+#define MALI_IOC_REQUEST_HIGH_PRIORITY      _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s)
+#define MALI_IOC_TIMELINE_GET_LATEST_POINT  _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s)
+#define MALI_IOC_TIMELINE_WAIT              _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s)
+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s)
+#define MALI_IOC_SOFT_JOB_START             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s)
+#define MALI_IOC_SOFT_JOB_SIGNAL            _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s)
+#define MALI_IOC_PENDING_SUBMIT             _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s)
+
+#define MALI_IOC_MEM_ALLOC                  _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s)
+#define MALI_IOC_MEM_FREE                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s)
+#define MALI_IOC_MEM_BIND                   _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s)
+#define MALI_IOC_MEM_UNBIND                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s)
+#define MALI_IOC_MEM_COW                    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s)
+#define MALI_IOC_MEM_COW_MODIFY_RANGE       _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s)
+#define MALI_IOC_MEM_RESIZE                 _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s)
+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE       _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s)
+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s)
+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE    _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s)
+#define MALI_IOC_MEM_WRITE_SAFE             _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s)
+
+#define MALI_IOC_PP_START_JOB               _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s)
+#define MALI_IOC_PP_AND_GP_START_JOB        _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s)
+#define MALI_IOC_PP_NUMBER_OF_CORES_GET     _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s)
+#define MALI_IOC_PP_CORE_VERSION_GET        _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s)
+#define MALI_IOC_PP_DISABLE_WB              _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s)
+
+#define MALI_IOC_GP2_START_JOB              _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s)
+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET    _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s)
+#define MALI_IOC_GP2_CORE_VERSION_GET       _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s)
+#define MALI_IOC_GP2_SUSPEND_RESPONSE       _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s)
+
+#define MALI_IOC_PROFILING_ADD_EVENT        _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s)
+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS  _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s)
+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s)
+#define MALI_IOC_PROFILING_STREAM_FD_GET        _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s)
+#define MALI_IOC_PROILING_CONTROL_SET   _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s)
+
+#define MALI_IOC_VSYNC_EVENT_REPORT         _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_IOCTL_H__ */
diff --git a/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_events.h b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_events.h
new file mode 100644
index 000000000000..279bf8ee38a4
--- /dev/null
+++ b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_events.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_
+#define _MALI_UTGARD_PROFILING_EVENTS_H_
+
+/*
+ * The event ID is a 32 bit value consisting of different fields
+ * reserved, 4 bits, for future use
+ * event type, 4 bits, cinstr_profiling_event_type_t
+ * event channel, 8 bits, the source of the event.
+ * event data, 16 bit field, data depending on event type
+ */
+
+/**
+ * Specifies what kind of event this is
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_TYPE_SINGLE  = 0 << 24,
+	MALI_PROFILING_EVENT_TYPE_START   = 1 << 24,
+	MALI_PROFILING_EVENT_TYPE_STOP    = 2 << 24,
+	MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24,
+	MALI_PROFILING_EVENT_TYPE_RESUME  = 4 << 24,
+} cinstr_profiling_event_type_t;
+
+
+/**
+ * Secifies the channel/source of the event
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_CHANNEL_SOFTWARE =  0 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GP0      =  1 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP0      =  5 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP1      =  6 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP2      =  7 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP3      =  8 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP4      =  9 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP5      = 10 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP6      = 11 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_PP7      = 12 << 16,
+	MALI_PROFILING_EVENT_CHANNEL_GPU      = 21 << 16,
+} cinstr_profiling_event_channel_t;
+
+
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16)
+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16)
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE                  = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME         = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH                 = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT              = 4,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE            = 5,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE            = 6,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK              = 7,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK             = 8,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC        = 10,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC        = 11,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS   = 13,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK          = 53,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK              = 54,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK            = 55,
+	MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED           = 56,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP    = 57,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE  = 58,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC         = 59,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH       = 62,
+	MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63,
+} cinstr_profiling_event_reason_single_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ * to inform whether the core is physical or virtual
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL  = 0,
+	MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL   = 1,
+} cinstr_profiling_event_reason_start_stop_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel
+ */
+typedef enum {
+	/*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE            = 0,*/
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI            = 1,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD   = 3,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF     = 4,
+	MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF      = 5,
+} cinstr_profiling_event_reason_start_stop_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE                     =  0, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL            =  1, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC                    = 26, /* used in some build configurations */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT           = 27, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC           = 28, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP   = 29, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE          = 30, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL       = 31, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS     = 32, /* used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE  = 33, /* NOT used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER         = 34, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER       = 35, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK                 = 36, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK          = 37, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP                 = 38, /* Not currently used */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING          = 40, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING          = 41, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE     = 42, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP       = 43,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS   = 44,
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC            = 45, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT             = 46, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT         = 47, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT           = 48, /* USED */
+	MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT      = 49, /* USED */
+} cinstr_profiling_event_reason_suspend_resume_sw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx)
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE          = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT     = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH         = 2,
+} cinstr_profiling_event_reason_single_hw_t;
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE              = 0,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE  = 1,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS      = 2,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS      = 3,
+	MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS      = 4,
+} cinstr_profiling_event_reason_single_gpu_t;
+
+/**
+ * These values are applicable for the 3rd data parameter when
+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel
+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason.
+ */
+typedef enum {
+	MALI_PROFILING_EVENT_DATA_CORE_GP0             =  1,
+	MALI_PROFILING_EVENT_DATA_CORE_PP0             =  5,
+	MALI_PROFILING_EVENT_DATA_CORE_PP1             =  6,
+	MALI_PROFILING_EVENT_DATA_CORE_PP2             =  7,
+	MALI_PROFILING_EVENT_DATA_CORE_PP3             =  8,
+	MALI_PROFILING_EVENT_DATA_CORE_PP4             =  9,
+	MALI_PROFILING_EVENT_DATA_CORE_PP5             = 10,
+	MALI_PROFILING_EVENT_DATA_CORE_PP6             = 11,
+	MALI_PROFILING_EVENT_DATA_CORE_PP7             = 12,
+	MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU         = 22, /* GP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU         = 26, /* PP0 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU         = 27, /* PP1 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU         = 28, /* PP2 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU         = 29, /* PP3 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU         = 30, /* PP4 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU         = 31, /* PP5 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU         = 32, /* PP6 + 21 */
+	MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU         = 33, /* PP7 + 21 */
+
+} cinstr_profiling_event_data_core_t;
+
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num))
+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num))
+
+
+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/
diff --git a/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_gator_api.h b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 000000000000..b922187a0b88
--- /dev/null
+++ b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \
+	{                                           \
+		"Filmstrip_cnt0",                 \
+		"Frequency",       \
+		"Voltage",       \
+		"vertex",     \
+		"fragment",         \
+		"Total_alloc_pages",        \
+	};
+
+#define _MALI_MEM_COUTNER_DESCRIPTIONS \
+	{                                           \
+		"untyped_memory",                 \
+		"vertex_index_buffer",       \
+		"texture_buffer",       \
+		"varying_buffer",     \
+		"render_target",         \
+		"pbuffer_buffer",        \
+		"plbu_heap",            \
+		"pointer_array_buffer",             \
+		"slave_tilelist",          \
+		"untyped_gp_cmdlist",     \
+		"polygon_cmdlist",               \
+		"texture_descriptor",               \
+		"render_state_word",               \
+		"shader",               \
+		"stream_buffer",               \
+		"fragment_stack",               \
+		"uniform",               \
+		"untyped_frame_pool",               \
+		"untyped_surface",               \
+	};
+
+/** The list of events supported by the Mali DDK. */
+typedef enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
+
+	/* Fragment processor activity */
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
+
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
+
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
+
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1,
+
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
+
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
+
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
+
+	/* Special counter */
+
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
+	COUNTER_VP_ACTIVITY,
+	COUNTER_FP_ACTIVITY,
+	COUNTER_TOTAL_ALLOC_PAGES,
+
+	/* Memory usage counter */
+	COUNTER_MEM_UNTYPED,
+	COUNTER_MEM_VB_IB,
+	COUNTER_MEM_TEXTURE,
+	COUNTER_MEM_VARYING,
+	COUNTER_MEM_RT,
+	COUNTER_MEM_PBUFFER,
+	/* memory usages for gp command */
+	COUNTER_MEM_PLBU_HEAP,
+	COUNTER_MEM_POINTER_ARRAY,
+	COUNTER_MEM_SLAVE_TILELIST,
+	COUNTER_MEM_UNTYPE_GP_CMDLIST,
+	/* memory usages for polygon list command */
+	COUNTER_MEM_POLYGON_CMDLIST,
+	/* memory usages for pp command */
+	COUNTER_MEM_TD,
+	COUNTER_MEM_RSW,
+	/* other memory usages */
+	COUNTER_MEM_SHADER,
+	COUNTER_MEM_STREAMS,
+	COUNTER_MEM_FRAGMENT_STACK,
+	COUNTER_MEM_UNIFORM,
+	/* Special mem usage, which is used for mem pool allocation */
+	COUNTER_MEM_UNTYPE_MEM_POOL,
+	COUNTER_MEM_UNTYPE_SURFACE,
+
+	NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_TOTAL_ALLOC_PAGES
+
+#define FIRST_MEM_COUNTER               COUNTER_MEM_UNTYPED
+#define LAST_MEM_COUNTER                COUNTER_MEM_UNTYPE_SURFACE
+
+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1)
+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM     (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1)
+#define MALI_PROFILING_SW_COUNTERS_NUM  (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1)
+
+/**
+ * Define the stream header type for porfiling stream.
+ */
+#define  STREAM_HEADER_FRAMEBUFFER 0x05         /* The stream packet header type for framebuffer dumping. */
+#define STREAM_HEADER_COUNTER_VALUE  0x09       /* The stream packet header type for hw/sw/memory counter sampling. */
+#define STREAM_HEADER_CORE_ACTIVITY 0x0a                /* The stream packet header type for activity counter sampling. */
+#define STREAM_HEADER_SIZE      5
+
+/**
+ * Define the packet header type of profiling control packet.
+ */
+#define PACKET_HEADER_ERROR            0x80             /* The response packet header type if error. */
+#define PACKET_HEADER_ACK              0x81             /* The response packet header type if OK. */
+#define PACKET_HEADER_COUNTERS_REQUEST 0x82             /* The control packet header type to request counter information from ddk. */
+#define PACKET_HEADER_COUNTERS_ACK         0x83         /* The response packet header type to send out counter information. */
+#define PACKET_HEADER_COUNTERS_ENABLE  0x84             /* The control packet header type to enable counters. */
+#define PACKET_HEADER_START_CAPTURE_VALUE            0x85               /* The control packet header type to start capture values. */
+
+#define PACKET_HEADER_SIZE      5
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/**
+ * Structure to define the mali profiling counter struct.
+ */
+typedef struct mali_profiling_counter {
+	char counter_name[40];
+	u32 counter_id;
+	u32 counter_event;
+	u32 prev_counter_value;
+	u32 current_counter_value;
+	u32 key;
+	int enabled;
+} mali_profiling_counter;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define MEM_COUNTER_ENABLE (5)
+#define ANNOTATE_PROFILING_ENABLE (6)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_uk_types.h b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_uk_types.h
new file mode 100644
index 000000000000..a291dfba2c08
--- /dev/null
+++ b/drivers/gpu/arm/utgard/include/linux/mali/mali_utgard_uk_types.h
@@ -0,0 +1,1106 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ * Class Path Exception
+ * Linking this library statically or dynamically with other modules is making a combined work based on this library. 
+ * Thus, the terms and conditions of the GNU General Public License cover the whole combination.
+ * As a special exception, the copyright holders of this library give you permission to link this library with independent modules 
+ * to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting 
+ * executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions 
+ * of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify 
+ * this library, you may extend this exception to your version of the library, but you are not obligated to do so. 
+ * If you do not wish to do so, delete this exception statement from your version.
+ */
+
+/**
+ * @file mali_uk_types.h
+ * Defines the types and constants used in the user-kernel interface
+ */
+
+#ifndef __MALI_UTGARD_UK_TYPES_H__
+#define __MALI_UTGARD_UK_TYPES_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Iteration functions depend on these values being consecutive. */
+#define MALI_UK_TIMELINE_GP   0
+#define MALI_UK_TIMELINE_PP   1
+#define MALI_UK_TIMELINE_SOFT 2
+#define MALI_UK_TIMELINE_MAX  3
+
+#define MALI_UK_BIG_VARYING_SIZE  (1024*1024*2)
+
+typedef struct {
+	u32 points[MALI_UK_TIMELINE_MAX];
+	s32 sync_fd;
+} _mali_uk_fence_t;
+
+/**
+ * @addtogroup uddapi Unified Device Driver (UDD) APIs
+ *
+ * @{
+ */
+
+/**
+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs
+ *
+ * @{
+ */
+
+/** @defgroup _mali_uk_core U/K Core
+ * @{ */
+
+/** Definition of subsystem numbers, to assist in creating a unique identifier
+ * for each U/K call.
+ *
+ * @see _mali_uk_functions */
+typedef enum {
+	_MALI_UK_CORE_SUBSYSTEM,      /**< Core Group of U/K calls */
+	_MALI_UK_MEMORY_SUBSYSTEM,    /**< Memory Group of U/K calls */
+	_MALI_UK_PP_SUBSYSTEM,        /**< Fragment Processor Group of U/K calls */
+	_MALI_UK_GP_SUBSYSTEM,        /**< Vertex Processor Group of U/K calls */
+	_MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */
+	_MALI_UK_VSYNC_SUBSYSTEM,     /**< VSYNC Group of U/K calls */
+} _mali_uk_subsystem_t;
+
+/** Within a function group each function has its unique sequence number
+ * to assist in creating a unique identifier for each U/K call.
+ *
+ * An ordered pair of numbers selected from
+ * ( \ref _mali_uk_subsystem_t,\ref  _mali_uk_functions) will uniquely identify the
+ * U/K call across all groups of functions, and all functions. */
+typedef enum {
+	/** Core functions */
+
+	_MALI_UK_OPEN                    = 0, /**< _mali_ukk_open() */
+	_MALI_UK_CLOSE,                       /**< _mali_ukk_close() */
+	_MALI_UK_WAIT_FOR_NOTIFICATION,       /**< _mali_ukk_wait_for_notification() */
+	_MALI_UK_GET_API_VERSION,             /**< _mali_ukk_get_api_version() */
+	_MALI_UK_POST_NOTIFICATION,           /**< _mali_ukk_post_notification() */
+	_MALI_UK_GET_USER_SETTING,            /**< _mali_ukk_get_user_setting() *//**< [out] */
+	_MALI_UK_GET_USER_SETTINGS,           /**< _mali_ukk_get_user_settings() *//**< [out] */
+	_MALI_UK_REQUEST_HIGH_PRIORITY,       /**< _mali_ukk_request_high_priority() */
+	_MALI_UK_TIMELINE_GET_LATEST_POINT,   /**< _mali_ukk_timeline_get_latest_point() */
+	_MALI_UK_TIMELINE_WAIT,               /**< _mali_ukk_timeline_wait() */
+	_MALI_UK_TIMELINE_CREATE_SYNC_FENCE,  /**< _mali_ukk_timeline_create_sync_fence() */
+	_MALI_UK_SOFT_JOB_START,              /**< _mali_ukk_soft_job_start() */
+	_MALI_UK_SOFT_JOB_SIGNAL,             /**< _mali_ukk_soft_job_signal() */
+	_MALI_UK_PENDING_SUBMIT,             /**< _mali_ukk_pending_submit() */
+
+	/** Memory functions */
+
+	_MALI_UK_ALLOC_MEM                = 0,   /**< _mali_ukk_alloc_mem() */
+	_MALI_UK_FREE_MEM,                       /**< _mali_ukk_free_mem() */
+	_MALI_UK_BIND_MEM,                       /**< _mali_ukk_mem_bind() */
+	_MALI_UK_UNBIND_MEM,                     /**< _mali_ukk_mem_unbind() */
+	_MALI_UK_COW_MEM,                        /**< _mali_ukk_mem_cow() */
+	_MALI_UK_COW_MODIFY_RANGE,               /**< _mali_ukk_mem_cow_modify_range() */
+	_MALI_UK_RESIZE_MEM,                     /**<._mali_ukk_mem_resize() */
+	_MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */
+	_MALI_UK_DUMP_MMU_PAGE_TABLE,            /**< _mali_ukk_mem_dump_mmu_page_table() */
+	_MALI_UK_DMA_BUF_GET_SIZE,               /**< _mali_ukk_dma_buf_get_size() */
+	_MALI_UK_MEM_WRITE_SAFE,                 /**< _mali_uku_mem_write_safe() */
+
+	/** Common functions for each core */
+
+	_MALI_UK_START_JOB           = 0,     /**< Start a Fragment/Vertex Processor Job on a core */
+	_MALI_UK_GET_NUMBER_OF_CORES,         /**< Get the number of Fragment/Vertex Processor cores */
+	_MALI_UK_GET_CORE_VERSION,            /**< Get the Fragment/Vertex Processor version compatible with all cores */
+
+	/** Fragment Processor Functions  */
+
+	_MALI_UK_PP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_pp_start_job() */
+	_MALI_UK_GET_PP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_pp_number_of_cores() */
+	_MALI_UK_GET_PP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_pp_core_version() */
+	_MALI_UK_PP_DISABLE_WB,                                           /**< _mali_ukk_pp_job_disable_wb() */
+	_MALI_UK_PP_AND_GP_START_JOB,                                     /**< _mali_ukk_pp_and_gp_start_job() */
+
+	/** Vertex Processor Functions  */
+
+	_MALI_UK_GP_START_JOB            = _MALI_UK_START_JOB,            /**< _mali_ukk_gp_start_job() */
+	_MALI_UK_GET_GP_NUMBER_OF_CORES  = _MALI_UK_GET_NUMBER_OF_CORES,  /**< _mali_ukk_get_gp_number_of_cores() */
+	_MALI_UK_GET_GP_CORE_VERSION     = _MALI_UK_GET_CORE_VERSION,     /**< _mali_ukk_get_gp_core_version() */
+	_MALI_UK_GP_SUSPEND_RESPONSE,                                     /**< _mali_ukk_gp_suspend_response() */
+
+	/** Profiling functions */
+
+	_MALI_UK_PROFILING_ADD_EVENT     = 0, /**< __mali_uku_profiling_add_event() */
+	_MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */
+	_MALI_UK_PROFILING_MEMORY_USAGE_GET,  /**< __mali_uku_profiling_memory_usage_get() */
+	_MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */
+	_MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */
+
+	/** VSYNC reporting fuctions */
+	_MALI_UK_VSYNC_EVENT_REPORT      = 0, /**< _mali_ukk_vsync_event_report() */
+} _mali_uk_functions;
+
+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info
+ * @{ */
+
+/**
+ * Type definition for the core version number.
+ * Used when returning the version number read from a core
+ *
+ * Its format is that of the 32-bit Version register for a particular core.
+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference
+ * Manual", ARM DDI 0415C, for more information.
+ */
+typedef u32 _mali_core_version;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_gp_suspend_response()
+ *
+ * When _mali_wait_for_notification() receives notification that a
+ * Vertex Processor job was suspended, you need to send a response to indicate
+ * what needs to happen with this job. You can either abort or resume the job.
+ *
+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or
+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap
+ * for the job that will resolve the out of memory condition for the job.
+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification;
+ * this is an identifier for the suspended job
+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If
+ * you resume it, @c argument[0] should specify the Mali start address for the new
+ * heap and @c argument[1] the Mali end address of the heap.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ */
+typedef enum _maligp_job_suspended_response_code {
+	_MALIGP_JOB_ABORT,                  /**< Abort the Vertex Processor job */
+	_MALIGP_JOB_RESUME_WITH_NEW_HEAP    /**< Resume the Vertex Processor job with a new heap */
+} _maligp_job_suspended_response_code;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 cookie;                     /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */
+	_maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */
+	u32 arguments[2];               /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */
+} _mali_uk_gp_suspend_response_s;
+
+/** @} */ /* end group _mali_uk_gp_suspend_response_s */
+
+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job
+ * @{ */
+
+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job  */
+typedef enum {
+	_MALI_UK_JOB_STATUS_END_SUCCESS         = 1 << (16 + 0),
+	_MALI_UK_JOB_STATUS_END_OOM             = 1 << (16 + 1),
+	_MALI_UK_JOB_STATUS_END_ABORT           = 1 << (16 + 2),
+	_MALI_UK_JOB_STATUS_END_TIMEOUT_SW      = 1 << (16 + 3),
+	_MALI_UK_JOB_STATUS_END_HANG            = 1 << (16 + 4),
+	_MALI_UK_JOB_STATUS_END_SEG_FAULT       = 1 << (16 + 5),
+	_MALI_UK_JOB_STATUS_END_ILLEGAL_JOB     = 1 << (16 + 6),
+	_MALI_UK_JOB_STATUS_END_UNKNOWN_ERR     = 1 << (16 + 7),
+	_MALI_UK_JOB_STATUS_END_SHUTDOWN        = 1 << (16 + 8),
+	_MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9)
+} _mali_uk_job_status;
+
+#define MALIGP2_NUM_REGS_FRAME (6)
+
+/** @brief Arguments for _mali_ukk_gp_start_job()
+ *
+ * To start a Vertex Processor job
+ * - associate the request with a reference to a @c mali_gp_job_info by setting
+ * user_job_ptr to the address of the @c mali_gp_job_info of the job.
+ * - set @c priority to the priority of the @c mali_gp_job_info
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ *
+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, @c _mali_wait_for_notification() will be notified
+ * that the job finished or got suspended. It may get suspended due to
+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification())
+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If
+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s
+ * result.
+ *
+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains
+ * the @c user_job_ptr identifier used to start the job with, the @c reason
+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie
+ * to identify the core on which the job stalled.  This @c cookie will be needed
+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response().
+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or
+ * resume the job. If the job got suspended due to an out of memory condition
+ * you may be able to resolve this by providing more memory and resuming the job.
+ *
+ */
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;                   /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */
+	u32 priority;                       /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */
+	u32 heap_grow_size;     /** <[in] the grow size of the plbu heap when out of memory */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;            /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */
+	u32 varying_memsize;            /** < [in] size of varying memory to use deffer bind*/
+	u32 varying_alloc_num;
+	u64 varying_alloc_list;         /** < [in] memory hanlde list of varying buffer to use deffer bind */
+} _mali_uk_gp_start_job_s;
+
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */
+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */
+
+/** @} */ /* end group _mali_uk_gpstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;               /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;     /**< [out] status of finished job */
+	u32 heap_current_addr;          /**< [out] value of the GP PLB PL heap start address register */
+	u32 perf_counter0;              /**< [out] value of performance counter 0 (see ARM DDI0415A) */
+	u32 perf_counter1;              /**< [out] value of performance counter 1 (see ARM DDI0415A) */
+	u32 pending_big_job_num;
+} _mali_uk_gp_job_finished_s;
+
+typedef struct {
+	u64 user_job_ptr;                    /**< [out] identifier for the job in user space */
+	u32 cookie;                          /**< [out] identifier for the core in kernel space on which the job stalled */
+	u32 heap_added_size;
+} _mali_uk_gp_job_suspended_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+
+/** @defgroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+#define _MALI_PP_MAX_SUB_JOBS 8
+
+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1)
+
+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1)
+
+#define _MALI_DLBU_MAX_REGISTERS 4
+
+/** Flag for _mali_uk_pp_start_job_s */
+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0)
+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1)
+
+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_pp_start_job()
+ *
+ * To start a Fragment Processor job
+ * - associate the request with a reference to a mali_pp_job by setting
+ * @c user_job_ptr to the address of the @c mali_pp_job of the job.
+ * - set @c priority to the priority of the mali_pp_job
+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of
+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the
+ * default timeout in use by the device driver.
+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers.
+ * For MALI200 you also need to copy the write back 0,1 and 2 registers.
+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero
+ * for a non-instrumented build. For an instrumented build you can use up
+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag
+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify
+ * the source of what needs to get counted (e.g. number of vertex loader
+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60.
+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open()
+ *
+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the
+ * result of the request (see \ref _mali_uk_start_job_status). If the job could
+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be
+ * tried again.
+ *
+ * After the job has started, _mali_wait_for_notification() will be notified
+ * when the job finished. The notification will contain a
+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr
+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status),
+ * the number of milliseconds the job took to render, and values of core registers
+ * when the job finished (irq status, performance counters, renderer list
+ * address). A job has finished succesfully when its status is
+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering
+ * the job, or software detected the job is taking more than @c watchdog_msecs to
+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG.
+ * If the hardware detected a bus error while accessing memory associated with the
+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT.
+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to
+ * stop the job but the job didn't start on the hardware yet, e.g. when the
+ * driver shutdown.
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job_ptr;               /**< [in] identifier for the job in user space */
+	u32 priority;                   /**< [in] job priority. A lower number means higher priority */
+	u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS];         /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */
+	u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */
+	u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */
+	u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS];
+	u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */
+	u32 num_cores;                      /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */
+	u32 perf_counter_flag;              /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */
+	u32 perf_counter_src0;              /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */
+	u32 perf_counter_src1;              /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */
+	u32 frame_builder_id;               /**< [in] id of the originating frame builder */
+	u32 flush_id;                       /**< [in] flush id within the originating frame builder */
+	u32 flags;                          /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */
+	u32 tilesx;                         /**< [in] number of tiles in the x direction (needed for heatmap generation */
+	u32 tilesy;                         /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */
+	u32 heatmap_mem;                    /**< [in] memory address to store counter values per tile (aka heatmap) */
+	u32 num_memory_cookies;             /**< [in] number of memory cookies attached to job */
+	u64 memory_cookies;               /**< [in] pointer to array of u32 memory cookies attached to job */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u64 timeline_point_ptr;           /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */
+} _mali_uk_pp_start_job_s;
+
+typedef struct {
+	u64 ctx;       /**< [in,out] user-kernel context (trashed on output) */
+	u64 gp_args;   /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */
+	u64 pp_args;   /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */
+} _mali_uk_pp_and_gp_start_job_s;
+
+/** @} */ /* end group _mali_uk_ppstartjob_s */
+
+typedef struct {
+	u64 user_job_ptr;                          /**< [out] identifier for the job in user space */
+	_mali_uk_job_status status;                /**< [out] status of finished job */
+	u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS];  /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */
+	u32 perf_counter_src0;
+	u32 perf_counter_src1;
+} _mali_uk_pp_job_finished_s;
+
+typedef struct {
+	u32 number_of_enabled_cores;               /**< [out] the new number of enabled cores */
+} _mali_uk_pp_num_cores_changed_s;
+
+
+
+/**
+ * Flags to indicate write-back units
+ */
+typedef enum {
+	_MALI_UK_PP_JOB_WB0 = 1,
+	_MALI_UK_PP_JOB_WB1 = 2,
+	_MALI_UK_PP_JOB_WB2 = 4,
+} _mali_uk_pp_job_wbx_flag;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 fb_id;                      /**< [in] Frame builder ID of job to disable WB units for */
+	u32 wb0_memory;
+	u32 wb1_memory;
+	u32 wb2_memory;
+} _mali_uk_pp_disable_wb_s;
+
+
+/** @} */ /* end group _mali_uk_pp */
+
+/** @defgroup _mali_uk_soft_job U/K Soft Job
+ * @{ */
+
+typedef struct {
+	u64 ctx;                            /**< [in,out] user-kernel context (trashed on output) */
+	u64 user_job;                       /**< [in] identifier for the job in user space */
+	u64 job_id_ptr;                     /**< [in,out] pointer to location of u32 where job id will be written */
+	_mali_uk_fence_t fence;             /**< [in] fence this job must wait on */
+	u32 point;                          /**< [out] point on soft timeline for this job */
+	u32 type;                           /**< [in] type of soft job */
+} _mali_uk_soft_job_start_s;
+
+typedef struct {
+	u64 user_job;                       /**< [out] identifier for the job in user space */
+} _mali_uk_soft_job_activated_s;
+
+typedef struct {
+	u64 ctx;                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 job_id;                         /**< [in] id for soft job */
+} _mali_uk_soft_job_signal_s;
+
+/** @} */ /* end group _mali_uk_soft_job */
+
+typedef struct {
+	u32 counter_id;
+	u32 key;
+	int enable;
+} _mali_uk_annotate_profiling_mem_counter_s;
+
+typedef struct {
+	u32 sampling_rate;
+	int enable;
+} _mali_uk_annotate_profiling_enable_s;
+
+
+/** @addtogroup _mali_uk_core U/K Core
+ * @{ */
+
+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification
+ * @{ */
+
+/** @brief Notification type encodings
+ *
+ * Each Notification type is an ordered pair of (subsystem,id), and is unique.
+ *
+ * The encoding of subsystem,id into a 32-bit word is:
+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK)
+ *            | (( id <<  _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK)
+ *
+ * @see _mali_uk_wait_for_notification_s
+ */
+typedef enum {
+	/** core notifications */
+
+	_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20,
+	_MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40,
+	_MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80,
+	_MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100,
+
+	/** Fragment Processor notifications */
+
+	_MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20,
+
+	/** Vertex Processor notifications */
+
+	_MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20,
+
+	/** Profiling notifications */
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10,
+	_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20,
+} _mali_uk_notification_type;
+
+/** to assist in splitting up 32-bit notification value in subsystem and id value */
+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000
+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16
+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF
+#define _MALI_NOTIFICATION_ID_SHIFT 0
+
+
+/** @brief Enumeration of possible settings which match mali_setting_t in user space
+ *
+ *
+ */
+typedef enum {
+	_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0,
+	_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES,
+	_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR,
+	_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED,
+	_MALI_UK_USER_SETTING_MAX,
+} _mali_uk_user_setting_t;
+
+/* See mali_user_settings_db.c */
+extern const char *_mali_uk_user_setting_descriptions[];
+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \
+	{                                           \
+		"sw_events_enable",                 \
+		"colorbuffer_capture_enable",       \
+		"depthbuffer_capture_enable",       \
+		"stencilbuffer_capture_enable",     \
+		"per_tile_counters_enable",         \
+		"buffer_capture_compositor",        \
+		"buffer_capture_window",            \
+		"buffer_capture_other",             \
+		"buffer_capture_n_frames",          \
+		"buffer_capture_resize_factor",     \
+		"sw_counters_enable",               \
+	};
+
+/** @brief struct to hold the value to a particular setting as seen in the kernel space
+ */
+typedef struct {
+	_mali_uk_user_setting_t setting;
+	u32 value;
+} _mali_uk_settings_changed_s;
+
+/** @brief Arguments for _mali_ukk_wait_for_notification()
+ *
+ * On successful return from _mali_ukk_wait_for_notification(), the members of
+ * this structure will indicate the reason for notification.
+ *
+ * Specifically, the source of the notification can be identified by the
+ * subsystem and id fields of the mali_uk_notification_type in the code.type
+ * member. The type member is encoded in a way to divide up the types into a
+ * subsystem field, and a per-subsystem ID field. See
+ * _mali_uk_notification_type for more information.
+ *
+ * Interpreting the data union member depends on the notification type:
+ *
+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS
+ *     - The kernel side is shutting down. No further
+ * _mali_uk_wait_for_notification() calls should be made.
+ *     - In this case, the value of the data union member is undefined.
+ *     - This is used to indicate to the user space client that it should close
+ * the connection to the Mali Device Driver.
+ * - type == _MALI_NOTIFICATION_PP_FINISHED
+ *    - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr
+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render,
+ * and values of core registers when the job finished (irq status, performance counters, renderer list
+ * address).
+ *    - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED.
+ *    - If the hardware detected a timeout while rendering the job, or software detected the job is
+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will
+ * indicate _MALI_UK_JOB_STATUS_HANG.
+ *    - If the hardware detected a bus error while accessing memory associated with the job, status will
+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT.
+ *    - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job
+ * didn't start the hardware yet, e.g. when the driver closes.
+ * - type == _MALI_NOTIFICATION_GP_FINISHED
+ *     - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of
+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned.
+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED.
+ * - type == _MALI_NOTIFICATION_GP_STALLED
+ *     - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr
+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on
+ * which the job stalled.
+ *     - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY
+ * when the polygon list builder unit has run out of memory.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [out] Type of notification available */
+	union {
+		_mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */
+		_mali_uk_gp_job_finished_s  gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */
+		_mali_uk_pp_job_finished_s  pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */
+		_mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */
+		_mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */
+		_mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter;
+		_mali_uk_annotate_profiling_enable_s profiling_enable;
+	} data;
+} _mali_uk_wait_for_notification_s;
+
+/** @brief Arguments for _mali_ukk_post_notification()
+ *
+ * Posts the specified notification to the notification queue for this application.
+ * This is used to send a quit message to the callback thread.
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_notification_type type; /**< [in] Type of notification to post */
+} _mali_uk_post_notification_s;
+
+/** @} */ /* end group _mali_uk_waitfornotification_s */
+
+/** @defgroup _mali_uk_getapiversion_s Get API Version
+ * @{ */
+
+/** helpers for Device Driver API version handling */
+
+/** @brief Encode a version ID from a 16-bit input
+ *
+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */
+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x))
+
+/** @brief Check whether a 32-bit value is likely to be Device Driver API
+ * version ID. */
+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF))
+
+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version
+ * ID */
+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF)
+
+/** @brief Determine whether two 32-bit encoded version IDs match */
+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y))))
+
+/**
+ * API version define.
+ * Indicates the version of the kernel API
+ * The version is a 16bit integer incremented on each API change.
+ * The 16bit integer is stored twice in a 32bit integer
+ * For example, for version 1 the value would be 0x00010001
+ */
+#define _MALI_API_VERSION 800
+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION)
+
+/**
+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits
+ * of a 32-bit value. The 16-bit API version value is incremented on each API
+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s.
+ */
+typedef u32 _mali_uk_api_version;
+
+/** @brief Arguments for _mali_uk_get_api_version()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u32 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_s;
+
+/** @brief Arguments for _mali_uk_get_api_version_v2()
+ *
+ * The user-side interface version must be written into the version member,
+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of
+ * the kernel-side interface.
+ *
+ * On successful return, the version member will be the API version of the
+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version
+ * of the API.
+ *
+ * The compatible member must be checked to see if the version of the user-side
+ * interface is compatible with the kernel-side interface, since future versions
+ * of the interface may be backwards compatible.
+ */
+typedef struct {
+	u64 ctx;                        /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_api_version version;   /**< [in,out] API version of user-side interface. */
+	int compatible;                 /**< [out] @c 1 when @version is compatible, @c 0 otherwise */
+} _mali_uk_get_api_version_v2_s;
+
+/** @} */ /* end group _mali_uk_getapiversion_s */
+
+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */
+
+/** @brief struct to keep the matching values of the user space settings within certain context
+ *
+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value
+ * of that particular setting.
+ *
+ * All settings are given reference to the context pointed to by the ctx pointer.
+ *
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */
+} _mali_uk_get_user_settings_s;
+
+/** @brief struct to hold the value of a particular setting from the user space within a given context
+ */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_user_setting_t setting; /**< [in] setting to get */
+	u32 value;                       /**< [out] value of setting */
+} _mali_uk_get_user_setting_s;
+
+/** @brief Arguments for _mali_ukk_request_high_priority() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_request_high_priority_s;
+
+/** @brief Arguments for _mali_ukk_pending_submit() */
+typedef struct {
+	u64 ctx;                       /**< [in,out] user-kernel context (trashed on output) */
+} _mali_uk_pending_submit_s;
+
+/** @} */ /* end group _mali_uk_core */
+
+
+/** @defgroup _mali_uk_memory U/K Memory
+ * @{ */
+
+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE  (1<<4) /* BUFFER can trim dow/grow*/
+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/
+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE   (1<<6) /* Allocate swappale memory. */
+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/
+
+
+typedef struct {
+	u64 ctx;                                          /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                                    /**< [in] GPU virtual address */
+	u32 vsize;                                        /**< [in] vitrual size of the allocation */
+	u32 psize;                                        /**< [in] physical size of the allocation */
+	u32 flags;
+	u64 backend_handle;                               /**< [out] backend handle */
+	struct {
+		/* buffer types*/
+		/* CPU read/write info*/
+	} buffer_info;
+} _mali_uk_alloc_mem_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 gpu_vaddr;                /**< [in] use as handle to free allocation */
+	u32 free_pages_nr;      /** < [out] record the number of free pages */
+} _mali_uk_free_mem_s;
+
+
+#define _MALI_MEMORY_BIND_BACKEND_UMP             (1<<8)
+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF         (1<<9)
+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY     (1<<10)
+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11)
+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW         (1<<12)
+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13)
+
+
+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \
+					_MALI_MEMORY_BIND_BACKEND_DMA_BUF |\
+					_MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\
+					_MALI_MEMORY_BIND_BACKEND_EXT_COW |\
+					_MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION)
+
+
+#define _MALI_MEMORY_GPU_READ_ALLOCATE            (1<<16)
+
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 vaddr;                                      /**< [in] mali address to map the physical memory to */
+	u32 size;                                       /**< [in] size */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 padding;                                    /** padding for 32/64 struct alignment */
+	union {
+		struct {
+			u32 secure_id;                  /**< [in] secure id */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ump;
+		struct {
+			u32 mem_fd;                     /**< [in] Memory descriptor */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_dma_buf;
+		struct {
+			/**/
+		} bind_mali_memory;
+		struct {
+			u32 phys_addr;                  /**< [in] physical address */
+			u32 rights;                     /**< [in] rights necessary for accessing memory */
+			u32 flags;                      /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */
+		} bind_ext_memory;
+	} mem_union;
+} _mali_uk_bind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 flags;                                      /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */
+	u32 vaddr;                                      /**<  [in] identifier for mapped memory object in kernel space  */
+} _mali_uk_unbind_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 target_handle;                              /**< [in] handle of allocation need to do COW */
+	u32 target_offset;                              /**< [in] offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, PAGE_SIZE align)*/
+	u32 target_size;                                /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/
+	u32 range_size;                                 /**< [in] re allocate size (PAGE_SIZE align)*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	u32 backend_handle;                             /**< [out] backend handle */
+	u32 flags;
+} _mali_uk_cow_mem_s;
+
+typedef struct {
+	u64 ctx;                                        /**< [in,out] user-kernel context (trashed on output) */
+	u32 range_start;                                /**< [in] re allocate range start offset, offset from the start of allocation */
+	u32 size;                                       /**< [in] re allocate size*/
+	u32 vaddr;                                      /**< [in] mali address for the new allocaiton */
+	s32 change_pages_nr;                            /**< [out] record the page number change for cow operation */
+} _mali_uk_cow_modify_range_s;
+
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 mem_fd;                     /**< [in] Memory descriptor */
+	u32 size;                       /**< [out] size */
+} _mali_uk_dma_buf_get_size_s;
+
+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */
+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0)
+
+
+typedef struct {
+	u64 ctx;                                /**< [in,out] user-kernel context (trashed on output) */
+	u64 vaddr;                              /* the buffer to do resize*/
+	u32 psize;                              /* wanted physical size of this memory */
+} _mali_uk_mem_resize_s;
+
+/**
+ * @brief Arguments for _mali_uk[uk]_mem_write_safe()
+ */
+typedef struct {
+	u64 ctx;  /**< [in,out] user-kernel context (trashed on output) */
+	u64 src;  /**< [in] Pointer to source data */
+	u64 dest; /**< [in] Destination Mali buffer */
+	u32 size;   /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */
+} _mali_uk_mem_write_safe_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [out] size of MMU page table information (registers + page tables) */
+} _mali_uk_query_mmu_page_table_dump_size_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 size;                       /**< [in] size of buffer to receive mmu page table information */
+	u64 buffer;                   /**< [in,out] buffer to receive mmu page table information */
+	u32 register_writes_size;       /**< [out] size of MMU register dump */
+	u64 register_writes;           /**< [out] pointer within buffer where MMU register dump is stored */
+	u32 page_table_dump_size;       /**< [out] size of MMU page table dump */
+	u64 page_table_dump;           /**< [out] pointer within buffer where MMU page table dump is stored */
+} _mali_uk_dump_mmu_page_table_s;
+
+/** @} */ /* end group _mali_uk_memory */
+
+
+/** @addtogroup _mali_uk_pp U/K Fragment Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores
+ * will contain the number of Fragment Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_total_cores;      /**< [out] Total number of Fragment Processor cores in the system */
+	u32 number_of_enabled_cores;    /**< [out] Number of enabled Fragment Processor cores */
+} _mali_uk_get_pp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_pp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains
+ * the version that all Fragment Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version  */
+	u32 padding;
+} _mali_uk_get_pp_core_version_s;
+
+/** @} */ /* end group _mali_uk_pp */
+
+
+/** @addtogroup _mali_uk_gp U/K Vertex Processor
+ * @{ */
+
+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores
+ * will contain the number of Vertex Processor cores in the system.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 number_of_cores;            /**< [out] number of Vertex Processor cores in the system */
+} _mali_uk_get_gp_number_of_cores_s;
+
+/** @brief Arguments for _mali_ukk_get_gp_core_version()
+ *
+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open()
+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains
+ * the version that all Vertex Processor cores are compatible with.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_core_version version;     /**< [out] version returned from core, see \ref _mali_core_version */
+} _mali_uk_get_gp_core_version_s;
+
+/** @} */ /* end group _mali_uk_gp */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 event_id;                   /**< [in] event id to register (see  enum mali_profiling_events for values) */
+	u32 data[5];                    /**< [in] event specific data */
+} _mali_uk_profiling_add_event_s;
+
+typedef struct {
+	u64 ctx;                     /**< [in,out] user-kernel context (trashed on output) */
+	u32 memory_usage;              /**< [out] total memory usage */
+	u32 vaddr;                                      /**< [in] mali address for the cow allocaiton */
+	s32 change_pages_nr;            /**< [out] record the page number change for cow operation */
+} _mali_uk_profiling_memory_usage_get_s;
+
+
+/** @addtogroup _mali_uk_memory U/K Memory
+ * @{ */
+
+/** @brief Arguments to _mali_ukk_mem_mmap()
+ *
+ * Use of the phys_addr member depends on whether the driver is compiled for
+ * Mali-MMU or nonMMU:
+ * - in the nonMMU case, this is the physical address of the memory as seen by
+ * the CPU (which may be a constant offset from that used by Mali)
+ * - in the MMU case, this is the Mali Virtual base address of the memory to
+ * allocate, and the particular physical pages used to back the memory are
+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages
+ * are not reported to user-space for security reasons.
+ *
+ * The cookie member must be stored for use later when freeing the memory by
+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure.
+ *
+ * The ukk_private word must be set to zero when calling from user-space. On
+ * Kernel-side, the  OS implementation of the U/K interface can use it to
+ * communicate data to the OS implementation of the OSK layer. In particular,
+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and
+ * will communicate its own ukk_private word through the ukk_private member
+ * here. The common code itself will not inspect or modify the ukk_private
+ * word, and so it may be safely used for whatever purposes necessary to
+ * integrate Mali Memory handling into the OS.
+ *
+ * The uku_private member is currently reserved for use by the user-side
+ * implementation of the U/K interface. Its value must be zero.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [out] Returns user-space virtual address for the mapping */
+	u32 size;                       /**< [in] Size of the requested mapping */
+	u32 phys_addr;                  /**< [in] Physical address - could be offset, depending on caller+callee convention */
+	mali_bool writeable;
+} _mali_uk_mem_mmap_s;
+
+/** @brief Arguments to _mali_ukk_mem_munmap()
+ *
+ * The cookie and mapping members must be that returned from the same previous
+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie
+ * and mapping - that is, it must be the value originally supplied to a call to
+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie.
+ *
+ * An error will be returned if an attempt is made to unmap only part of the
+ * originally obtained range, or to unmap more than was originally obtained.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	void *mapping;                  /**< [in] The mapping returned from mmap call */
+	u32 size;                       /**< [in] The size passed to mmap call */
+} _mali_uk_mem_munmap_s;
+/** @} */ /* end group _mali_uk_memory */
+
+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module
+ * @{ */
+
+/** @brief VSYNC events
+ *
+ * These events are reported when DDK starts to wait for vsync and when the
+ * vsync has occured and the DDK can continue on the next frame.
+ */
+typedef enum _mali_uk_vsync_event {
+	_MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0,
+	_MALI_UK_VSYNC_EVENT_END_WAIT
+} _mali_uk_vsync_event;
+
+/** @brief Arguments to _mali_ukk_vsync_event()
+ *
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_vsync_event event;     /**< [in] VSYNCH event type */
+} _mali_uk_vsync_event_report_s;
+
+/** @} */ /* end group _mali_uk_vsync */
+
+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting
+ * @{ */
+
+/** @brief Software counter values
+ *
+ * Values recorded for each of the software counters during a single renderpass.
+ */
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u64 counters;                  /**< [in] The array of u32 counter values */
+	u32 num_counters;              /**< [in] The number of elements in counters array */
+} _mali_uk_sw_counters_report_s;
+
+/** @} */ /* end group _mali_uk_sw_counters_report */
+
+/** @defgroup _mali_uk_timeline U/K Mali Timeline
+ * @{ */
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	u32 timeline;                   /**< [in] timeline id */
+	u32 point;                      /**< [out] latest point on timeline */
+} _mali_uk_timeline_get_latest_point_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] fence */
+	u32 timeout;                    /**< [in] timeout (0 for no wait, -1 for blocking) */
+	u32 status;                     /**< [out] status of fence (1 if signaled, 0 if timeout) */
+} _mali_uk_timeline_wait_s;
+
+typedef struct {
+	u64 ctx;                      /**< [in,out] user-kernel context (trashed on output) */
+	_mali_uk_fence_t fence;         /**< [in] mali fence to create linux sync fence from */
+	s32 sync_fd;                    /**< [out] file descriptor for new linux sync fence */
+} _mali_uk_timeline_create_sync_fence_s;
+
+/** @} */ /* end group _mali_uk_timeline */
+
+/** @} */ /* end group u_k_api */
+
+/** @} */ /* end group uddapi */
+
+typedef struct {
+	u64 ctx;                 /**< [in,out] user-kernel context (trashed on output) */
+	s32 stream_fd;   /**< [in] The profiling kernel base stream fd handle */
+} _mali_uk_profiling_stream_fd_get_s;
+
+typedef struct {
+	u64 ctx;        /**< [in,out] user-kernel context (trashed on output) */
+	u64 control_packet_data; /**< [in] the control packet data for control settings */
+	u32 control_packet_size;  /**< [in] The control packet size */
+	u64 response_packet_data; /** < [out] The response packet data */
+	u32 response_packet_size; /** < [in,out] The response packet data */
+} _mali_uk_profiling_control_set_s;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_UK_TYPES_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/license/gpl/mali_kernel_license.h b/drivers/gpu/arm/utgard/linux/license/gpl/mali_kernel_license.h
new file mode 100644
index 000000000000..c88c992e859b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/license/gpl/mali_kernel_license.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_license.h
+ * Defines for the macro MODULE_LICENSE.
+ */
+
+#ifndef __MALI_KERNEL_LICENSE_H__
+#define __MALI_KERNEL_LICENSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MALI_KERNEL_LINUX_LICENSE     "GPL"
+#define MALI_LICENSE_IS_GPL 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LICENSE_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_device_pause_resume.c b/drivers/gpu/arm/utgard/linux/mali_device_pause_resume.c
new file mode 100644
index 000000000000..37076a2eaf67
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_device_pause_resume.c
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_device_pause_resume.c
+ * Implementation of the Mali pause/resume functionality
+ */
+
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+
+void mali_dev_pause(void)
+{
+	/*
+	 * Deactive all groups to prevent hardware being touched
+	 * during the period of mali device pausing
+	 */
+	mali_pm_os_suspend(MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_dev_pause);
+
+void mali_dev_resume(void)
+{
+	mali_pm_os_resume();
+}
+
+EXPORT_SYMBOL(mali_dev_resume);
diff --git a/drivers/gpu/arm/utgard/linux/mali_kernel_linux.c b/drivers/gpu/arm/utgard/linux/mali_kernel_linux.c
new file mode 100644
index 000000000000..34cb3d7ca95a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_kernel_linux.c
@@ -0,0 +1,941 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_kernel_linux.c
+ * Implementation of the Linux device driver entrypoints
+ */
+#include <linux/module.h>   /* kernel module definitions */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/mm.h>       /* memory manager definitions */
+#include <linux/mali/mali_utgard_ioctl.h>
+#include <linux/version.h>
+#include <linux/device.h>
+#include "mali_kernel_license.h"
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/bug.h>
+#include <linux/of.h>
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_core.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_ukk.h"
+#include "mali_ukk_wrappers.h"
+#include "mali_kernel_sysfs.h"
+#include "mali_pm.h"
+#include "mali_kernel_license.h"
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_swap_alloc.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include "mali_profiling_internal.h"
+#endif
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+#include "mali_osk_profiling.h"
+#include "mali_dvfs_policy.h"
+
+static int is_first_resume = 1;
+/*Store the clk and vol for boot/insmod and mali_resume*/
+static struct mali_gpu_clk_item mali_gpu_clk[2];
+#endif
+
+/* Streamline support for the Mali driver */
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING)
+/* Ask Linux to create the tracepoints */
+#define CREATE_TRACE_POINTS
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters);
+#endif /* CONFIG_TRACEPOINTS */
+
+/* from the __malidrv_build_info.c file that is generated during build */
+extern const char *__malidrv_build_info(void);
+
+/* Module parameter to control log level */
+int mali_debug_level = 2;
+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output");
+
+extern int mali_max_job_runtime;
+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what");
+
+extern int mali_l2_max_reads;
+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache");
+
+extern unsigned int mali_dedicated_mem_start;
+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory.");
+
+extern unsigned int mali_dedicated_mem_size;
+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory.");
+
+extern unsigned int mali_shared_mem_size;
+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory.");
+
+#if defined(CONFIG_MALI400_PROFILING)
+extern int mali_boot_profiling;
+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization");
+#endif
+
+extern int mali_max_pp_cores_group_1;
+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group.");
+
+extern int mali_max_pp_cores_group_2;
+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only).");
+
+extern unsigned int mali_mem_swap_out_threshold_value;
+module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver.");
+
+#if defined(CONFIG_MALI_DVFS)
+/** the max fps the same as display vsync default 60, can set by module insert parameter */
+extern int mali_max_system_fps;
+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC.");
+
+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/
+extern int mali_desired_fps;
+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH);
+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps");
+#endif
+
+#if MALI_ENABLE_CPU_CYCLES
+#include <linux/cpumask.h>
+#include <linux/timer.h>
+#include <asm/smp.h>
+static struct timer_list mali_init_cpu_clock_timers[8];
+static u32 mali_cpu_clock_last_value[8] = {0,};
+#endif
+
+/* Export symbols from common code: mali_user_settings.c */
+#include "mali_user_settings_db.h"
+EXPORT_SYMBOL(mali_set_user_setting);
+EXPORT_SYMBOL(mali_get_user_setting);
+
+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */
+
+/* This driver only supports one Mali device, and this variable stores this single platform device */
+struct platform_device *mali_platform_device = NULL;
+
+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */
+static struct miscdevice mali_miscdevice = { 0, };
+
+static int mali_miscdevice_register(struct platform_device *pdev);
+static void mali_miscdevice_unregister(void);
+
+static int mali_open(struct inode *inode, struct file *filp);
+static int mali_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static int mali_probe(struct platform_device *pdev);
+static int mali_remove(struct platform_device *pdev);
+
+static int mali_driver_suspend_scheduler(struct device *dev);
+static int mali_driver_resume_scheduler(struct device *dev);
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev);
+static int mali_driver_runtime_resume(struct device *dev);
+static int mali_driver_runtime_idle(struct device *dev);
+#endif
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#if defined(CONFIG_MALI_DT)
+extern int mali_platform_device_init(struct platform_device *device);
+extern int mali_platform_device_deinit(struct platform_device *device);
+#else
+extern int mali_platform_device_register(void);
+extern int mali_platform_device_unregister(void);
+#endif
+#endif
+
+/* Linux power management operations provided by the Mali device driver */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+struct pm_ext_ops mali_dev_ext_pm_ops = {
+	.base =
+	{
+		.suspend = mali_driver_suspend_scheduler,
+		.resume = mali_driver_resume_scheduler,
+		.freeze = mali_driver_suspend_scheduler,
+		.thaw =   mali_driver_resume_scheduler,
+	},
+};
+#else
+static const struct dev_pm_ops mali_dev_pm_ops = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_driver_runtime_suspend,
+	.runtime_resume = mali_driver_runtime_resume,
+	.runtime_idle = mali_driver_runtime_idle,
+#endif
+	.suspend = mali_driver_suspend_scheduler,
+	.resume = mali_driver_resume_scheduler,
+	.freeze = mali_driver_suspend_scheduler,
+	.thaw = mali_driver_resume_scheduler,
+	.poweroff = mali_driver_suspend_scheduler,
+};
+#endif
+
+#ifdef CONFIG_MALI_DT
+static struct of_device_id base_dt_ids[] = {
+	{.compatible = "arm,mali-300"},
+	{.compatible = "arm,mali-400"},
+	{.compatible = "arm,mali-450"},
+	{.compatible = "arm,mali-470"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, base_dt_ids);
+#endif
+
+/* The Mali device driver struct */
+static struct platform_driver mali_platform_driver = {
+	.probe  = mali_probe,
+	.remove = mali_remove,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29))
+	.pm = &mali_dev_ext_pm_ops,
+#endif
+	.driver =
+	{
+		.name   = MALI_GPU_NAME_UTGARD,
+		.owner  = THIS_MODULE,
+		.bus = &platform_bus_type,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29))
+		.pm = &mali_dev_pm_ops,
+#endif
+#ifdef CONFIG_MALI_DT
+		.of_match_table = of_match_ptr(base_dt_ids),
+#endif
+	},
+};
+
+/* Linux misc device operations (/dev/mali) */
+struct file_operations mali_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_open,
+	.release = mali_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl = mali_ioctl,
+#else
+	.ioctl = mali_ioctl,
+#endif
+	.compat_ioctl = mali_ioctl,
+	.mmap = mali_mmap
+};
+
+#if MALI_ENABLE_CPU_CYCLES
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64)
+{
+	/* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */
+	u32 write_value;
+
+	/* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */
+	/* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+
+
+	/* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */
+	write_value = 1 << 0; /* Bit 0 set. Enable counters */
+	if (reset) {
+		write_value |= 1 << 1; /* Reset event counters */
+		write_value |= 1 << 2; /* Reset cycle counter  */
+	}
+	if (enable_divide_by_64) {
+		write_value |= 1 << 3; /* Enable the Clock divider by 64 */
+	}
+	write_value |= 1 << 4; /* Export enable. Not needed */
+	asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value));
+
+	/* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */
+	asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+
+
+	/* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */
+	/* User mode access to the Performance Monitors enabled. */
+	/* Lets User space read cpu clock cycles */
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
+}
+
+/** A timer function that configures the cycle clock counter on current CPU.
+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this
+ * function to trigger on all Cpus during module load.
+ */
+static void mali_init_cpu_clock_timer_func(unsigned long data)
+{
+	int reset_counters, enable_divide_clock_counter_by_64;
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+	unsigned int sample1;
+
+	MALI_IGNORE(data);
+
+	reset_counters = 1;
+	enable_divide_clock_counter_by_64 = 0;
+	mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64);
+
+	sample0 = mali_get_cpu_cyclecount();
+	sample1 = mali_get_cpu_cyclecount();
+
+	MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1));
+}
+
+/** A timer functions for storing current time on all cpus.
+ * Used for checking if the clocks have similar values or if they are drifting.
+ */
+static void mali_print_cpu_clock_timer_func(unsigned long data)
+{
+	int current_cpu = raw_smp_processor_id();
+	unsigned int sample0;
+
+	MALI_IGNORE(data);
+	sample0 = mali_get_cpu_cyclecount();
+	if (current_cpu < 8) {
+		mali_cpu_clock_last_value[current_cpu] = sample0;
+	}
+}
+
+/** Init the performance registers on all CPUs to count clock cycles.
+ * For init \a print_only should be 0.
+ * If \a print_only is 1, it will intead print the current clock value of all CPUs.
+ */
+void mali_init_cpu_time_counters_on_all_cpus(int print_only)
+{
+	int i = 0;
+	int cpu_number;
+	int jiffies_trigger;
+	int jiffies_wait;
+
+	jiffies_wait = 2;
+	jiffies_trigger = jiffies + jiffies_wait;
+
+	for (i = 0 ; i < 8 ; i++) {
+		init_timer(&mali_init_cpu_clock_timers[i]);
+		if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func;
+		else            mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func;
+		mali_init_cpu_clock_timers[i].expires = jiffies_trigger ;
+	}
+	cpu_number = cpumask_first(cpu_online_mask);
+	for (i = 0 ; i < 8 ; i++) {
+		int next_cpu;
+		add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number);
+		next_cpu = cpumask_next(cpu_number, cpu_online_mask);
+		if (next_cpu >= nr_cpu_ids) break;
+		cpu_number = next_cpu;
+	}
+
+	while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait);
+
+	for (i = 0 ; i < 8 ; i++) {
+		del_timer_sync(&mali_init_cpu_clock_timers[i]);
+	}
+
+	if (print_only) {
+		if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) {
+			/* Diff can be printed if we want to check if the clocks are in sync
+			int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1]));
+		} else {
+			MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3]));
+		}
+	}
+}
+#endif
+
+int mali_module_init(void)
+{
+	int err = 0;
+
+	MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION));
+	MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__));
+	MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING));
+
+#if MALI_ENABLE_CPU_CYCLES
+	mali_init_cpu_time_counters_on_all_cpus(0);
+	MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n"));
+	/* Printing the current cpu counters */
+	mali_init_cpu_time_counters_on_all_cpus(1);
+#endif
+
+	/* Initialize module wide settings */
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n"));
+	err = mali_platform_device_register();
+	if (0 != err) {
+		return err;
+	}
+#endif
+#endif
+
+	MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n"));
+
+	err = platform_driver_register(&mali_platform_driver);
+
+	if (0 != err) {
+		MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err));
+#ifdef MALI_FAKE_PLATFORM_DEVICE
+#ifndef CONFIG_MALI_DT
+		mali_platform_device_unregister();
+#endif
+#endif
+		mali_platform_device = NULL;
+		return err;
+	}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE);
+	if (0 != err) {
+		/* No biggie if we wheren't able to initialize the profiling */
+		MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n"));
+	}
+#endif
+
+	/* Tracing the current frequency and voltage from boot/insmod*/
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/
+	mali_get_current_gpu_clk_item(&mali_gpu_clk[0]);
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[0].clock,
+				      mali_gpu_clk[0].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	MALI_PRINT(("Mali device driver loaded\n"));
+
+	return 0; /* Success */
+}
+
+void mali_module_exit(void)
+{
+	MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION));
+
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n"));
+
+	platform_driver_unregister(&mali_platform_driver);
+
+#if defined(MALI_FAKE_PLATFORM_DEVICE)
+#ifndef CONFIG_MALI_DT
+	MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n"));
+	mali_platform_device_unregister();
+#endif
+#endif
+
+	/* Tracing the current frequency and voltage from rmmod*/
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+	_mali_internal_profiling_term();
+#endif
+
+	MALI_PRINT(("Mali device driver unloaded\n"));
+}
+
+static int mali_probe(struct platform_device *pdev)
+{
+	int err;
+
+	MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name));
+
+	if (NULL != mali_platform_device) {
+		/* Already connected to a device, return error */
+		MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device."));
+		return -EEXIST;
+	}
+
+	mali_platform_device = pdev;
+
+#ifdef CONFIG_MALI_DT
+	/* If we use DT to initialize our DDK, we have to prepare somethings. */
+	err = mali_platform_device_init(mali_platform_device);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device."));
+		return -EFAULT;
+	}
+#endif
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) {
+		/* Initialize the Mali GPU HW specified by pdev */
+		if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) {
+			/* Register a misc device (so we are accessible from user space) */
+			err = mali_miscdevice_register(pdev);
+			if (0 == err) {
+				/* Setup sysfs entries */
+				err = mali_sysfs_register(mali_dev_name);
+
+				if (0 == err) {
+					MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name));
+
+					return 0;
+				} else {
+					MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries"));
+				}
+				mali_miscdevice_unregister();
+			} else {
+				MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device."));
+			}
+			mali_terminate_subsystems();
+		} else {
+			MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver."));
+		}
+		_mali_osk_wq_term();
+	}
+
+	mali_platform_device = NULL;
+	return -EFAULT;
+}
+
+static int mali_remove(struct platform_device *pdev)
+{
+	MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name));
+	mali_sysfs_unregister();
+	mali_miscdevice_unregister();
+	mali_terminate_subsystems();
+	_mali_osk_wq_term();
+#ifdef CONFIG_MALI_DT
+	mali_platform_device_deinit(mali_platform_device);
+#endif
+	mali_platform_device = NULL;
+	return 0;
+}
+
+static int mali_miscdevice_register(struct platform_device *pdev)
+{
+	int err;
+
+	mali_miscdevice.minor = MISC_DYNAMIC_MINOR;
+	mali_miscdevice.name = mali_dev_name;
+	mali_miscdevice.fops = &mali_fops;
+	mali_miscdevice.parent = get_device(&pdev->dev);
+
+	err = misc_register(&mali_miscdevice);
+	if (0 != err) {
+		MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err));
+	}
+
+	return err;
+}
+
+static void mali_miscdevice_unregister(void)
+{
+	misc_deregister(&mali_miscdevice);
+}
+
+static int mali_driver_suspend_scheduler(struct device *dev)
+{
+	mali_pm_os_suspend(MALI_TRUE);
+	/* Tracing the frequency and voltage after mali is suspended */
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      0,
+				      0,
+				      0, 0, 0);
+	return 0;
+}
+
+static int mali_driver_resume_scheduler(struct device *dev)
+{
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+	mali_pm_os_resume();
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_driver_runtime_suspend(struct device *dev)
+{
+	if (MALI_TRUE == mali_pm_runtime_suspend()) {
+		/* Tracing the frequency and voltage after mali is suspended */
+		_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+					      MALI_PROFILING_EVENT_CHANNEL_GPU |
+					      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+					      0,
+					      0,
+					      0, 0, 0);
+
+		return 0;
+	} else {
+		return -EBUSY;
+	}
+}
+
+static int mali_driver_runtime_resume(struct device *dev)
+{
+	/* Tracing the frequency and voltage after mali is resumed */
+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS)
+	/* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/
+	if (is_first_resume == 1) {
+		mali_get_current_gpu_clk_item(&mali_gpu_clk[1]);
+		is_first_resume = 0;
+	}
+	_mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE |
+				      MALI_PROFILING_EVENT_CHANNEL_GPU |
+				      MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE,
+				      mali_gpu_clk[1].clock,
+				      mali_gpu_clk[1].vol / 1000,
+				      0, 0, 0);
+#endif
+
+	mali_pm_runtime_resume();
+	return 0;
+}
+
+static int mali_driver_runtime_idle(struct device *dev)
+{
+	/* Nothing to do */
+	return 0;
+}
+#endif
+
+static int mali_open(struct inode *inode, struct file *filp)
+{
+	struct mali_session_data *session_data;
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_open() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	/* allocated struct to track this session */
+	err = _mali_ukk_open((void **)&session_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* initialize file pointer */
+	filp->f_pos = 0;
+
+	/* link in our session data */
+	filp->private_data = (void *)session_data;
+
+	filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping;
+
+	return 0;
+}
+
+static int mali_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_errcode_t err;
+
+	/* input validation */
+	if (mali_miscdevice.minor != iminor(inode)) {
+		MALI_PRINT_ERROR(("mali_release() Minor does not match\n"));
+		return -ENODEV;
+	}
+
+	err = _mali_ukk_close((void **)&filp->private_data);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int map_errcode(_mali_osk_errcode_t err)
+{
+	switch (err) {
+	case _MALI_OSK_ERR_OK :
+		return 0;
+	case _MALI_OSK_ERR_FAULT:
+		return -EFAULT;
+	case _MALI_OSK_ERR_INVALID_FUNC:
+		return -ENOTTY;
+	case _MALI_OSK_ERR_INVALID_ARGS:
+		return -EINVAL;
+	case _MALI_OSK_ERR_NOMEM:
+		return -ENOMEM;
+	case _MALI_OSK_ERR_TIMEOUT:
+		return -ETIMEDOUT;
+	case _MALI_OSK_ERR_RESTARTSYSCALL:
+		return -ERESTARTSYS;
+	case _MALI_OSK_ERR_ITEM_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EFAULT;
+	}
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int mali_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int err;
+	struct mali_session_data *session_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	/* inode not used */
+	(void)inode;
+#endif
+
+	MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg));
+
+	session_data = (struct mali_session_data *)filp->private_data;
+	if (NULL == session_data) {
+		MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n"));
+		return -ENOTTY;
+	}
+
+	if (NULL == (void *)arg) {
+		MALI_DEBUG_PRINT(7, ("arg was NULL\n"));
+		return -ENOTTY;
+	}
+
+	switch (cmd) {
+	case MALI_IOC_WAIT_FOR_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64)));
+		err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION_V2:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64)));
+		err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_API_VERSION:
+		err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_POST_NOTIFICATION:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64)));
+		err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg);
+		break;
+
+	case MALI_IOC_GET_USER_SETTINGS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64)));
+		err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg);
+		break;
+
+	case MALI_IOC_REQUEST_HIGH_PRIORITY:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64)));
+		err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg);
+		break;
+
+	case MALI_IOC_PENDING_SUBMIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64)));
+		err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg);
+		break;
+
+#if defined(CONFIG_MALI400_PROFILING)
+	case MALI_IOC_PROFILING_ADD_EVENT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64)));
+		err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64)));
+		err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROFILING_STREAM_FD_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64)));
+		err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_PROILING_CONTROL_SET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64)));
+		err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg);
+		break;
+#else
+
+	case MALI_IOC_PROFILING_ADD_EVENT:          /* FALL-THROUGH */
+	case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */
+		MALI_DEBUG_PRINT(2, ("Profiling not supported\n"));
+		err = -ENOTTY;
+		break;
+#endif
+
+	case MALI_IOC_PROFILING_MEMORY_USAGE_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64)));
+		err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_ALLOC:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64)));
+		err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_FREE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64)));
+		err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_BIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64)));
+		err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_UNBIND:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64)));
+		err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64)));
+		err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_COW_MODIFY_RANGE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64)));
+		err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_RESIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64)));
+		err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_WRITE_SAFE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64)));
+		err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64)));
+		err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64)));
+		err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg);
+		break;
+
+	case MALI_IOC_MEM_DMA_BUF_GET_SIZE:
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64)));
+		err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n"));
+		err = -ENOTTY;
+#endif
+		break;
+
+	case MALI_IOC_PP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64)));
+		err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_AND_GP_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64)));
+		err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64)));
+		err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64)));
+		err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_PP_DISABLE_WB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64)));
+		err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_START_JOB:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64)));
+		err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_NUMBER_OF_CORES_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64)));
+		err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_CORE_VERSION_GET:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64)));
+		err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg);
+		break;
+
+	case MALI_IOC_GP2_SUSPEND_RESPONSE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64)));
+		err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg);
+		break;
+
+	case MALI_IOC_VSYNC_EVENT_REPORT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64)));
+		err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg);
+		break;
+
+	case MALI_IOC_TIMELINE_GET_LATEST_POINT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64)));
+		err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_WAIT:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64)));
+		err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg);
+		break;
+	case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64)));
+		err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_START:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64)));
+		err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg);
+		break;
+	case MALI_IOC_SOFT_JOB_SIGNAL:
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64)));
+		err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg);
+		break;
+
+	default:
+		MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg));
+		err = -ENOTTY;
+	};
+
+	return err;
+}
+
+
+module_init(mali_module_init);
+module_exit(mali_module_exit);
+
+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(SVN_REV_STRING);
diff --git a/drivers/gpu/arm/utgard/linux/mali_kernel_linux.h b/drivers/gpu/arm/utgard/linux/mali_kernel_linux.h
new file mode 100644
index 000000000000..f6042712d1e3
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_kernel_linux.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_LINUX_H__
+#define __MALI_KERNEL_LINUX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/cdev.h>     /* character device definitions */
+#include <linux/idr.h>
+#include <linux/rbtree.h>
+#include "mali_kernel_license.h"
+#include "mali_osk_types.h"
+
+extern struct platform_device *mali_platform_device;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.c b/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.c
new file mode 100644
index 000000000000..4f06ca14bf48
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.c
@@ -0,0 +1,1410 @@
+/**
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/**
+ * @file mali_kernel_sysfs.c
+ * Implementation of some sysfs data exports
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include "mali_kernel_license.h"
+#include "mali_kernel_common.h"
+#include "mali_ukk.h"
+
+#if MALI_LICENSE_IS_GPL
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_sysfs.h"
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+#include <linux/slab.h>
+#include "mali_osk_profiling.h"
+#endif
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_pm.h"
+#include "mali_pmu.h"
+#include "mali_group.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_hw_core.h"
+#include "mali_kernel_core.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+#include "mali_gp_job.h"
+#include "mali_pp_job.h"
+#include "mali_executor.h"
+
+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src)
+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src)
+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF)
+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE)
+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF)
+
+#define POWER_BUFFER_SIZE 3
+
+static struct dentry *mali_debugfs_dir = NULL;
+
+typedef enum {
+	_MALI_DEVICE_SUSPEND,
+	_MALI_DEVICE_RESUME,
+	_MALI_DEVICE_DVFS_PAUSE,
+	_MALI_DEVICE_DVFS_RESUME,
+	_MALI_MAX_EVENTS
+} _mali_device_debug_power_events;
+
+static const char *const mali_power_events[_MALI_MAX_EVENTS] = {
+	[_MALI_DEVICE_SUSPEND] = "suspend",
+	[_MALI_DEVICE_RESUME] = "resume",
+	[_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause",
+	[_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume",
+};
+
+static mali_bool power_always_on_enabled = MALI_FALSE;
+
+static int open_copy_private_data(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	r = snprintf(buffer, 64, "%u\n",
+		     mali_executor_group_is_disabled(group) ? 0 : 1);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	unsigned long val;
+	struct mali_group *group;
+
+	group = (struct mali_group *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(group);
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	r = kstrtoul(&buffer[0], 10, &val);
+	if (0 != r) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_group_enable(group);
+		break;
+	case 0:
+		mali_executor_group_disable(group);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static const struct file_operations group_enabled_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = group_enabled_read,
+	.write = group_enabled_write,
+};
+
+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+	struct mali_hw_core *hw_core;
+
+	hw_core = (struct mali_hw_core *)filp->private_data;
+	MALI_DEBUG_ASSERT_POINTER(hw_core);
+
+	r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr);
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations hw_core_base_addr_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read = hw_core_base_addr_read,
+};
+
+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	int r;
+	u32 val;
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Get counter for a particular sub job */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job);
+			} else {
+				val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job);
+			}
+		} else {
+			/* Get default counter for all PP sub jobs */
+			if (0 == src_id) {
+				val = mali_pp_job_get_pp_counter_global_src0();
+			} else {
+				val = mali_pp_job_get_pp_counter_global_src1();
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			val = mali_gp_job_get_gp_counter_src0();
+		} else {
+			val = mali_gp_job_get_gp_counter_src1();
+		}
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data);
+	u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data);
+	mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data);
+	u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data);
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	if (MALI_TRUE == is_pp) {
+		/* PP counter */
+		if (MALI_TRUE == is_sub_job) {
+			/* Set counter for a particular sub job */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val);
+			}
+		} else {
+			/* Set default counter for all PP sub jobs */
+			if (0 == src_id) {
+				mali_pp_job_set_pp_counter_global_src0((u32)val);
+			} else {
+				mali_pp_job_set_pp_counter_global_src1((u32)val);
+			}
+		}
+	} else {
+		/* GP counter */
+		if (0 == src_id) {
+			mali_gp_job_set_gp_counter_src0((u32)val);
+		} else {
+			mali_gp_job_set_gp_counter_src1((u32)val);
+		}
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_counter_src_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = profiling_counter_src_read,
+	.write = profiling_counter_src_write,
+};
+
+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 val;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	if (0 == src_id) {
+		val = mali_l2_cache_core_get_counter_src0(l2_core);
+	} else {
+		val = mali_l2_cache_core_get_counter_src1(l2_core);
+	}
+
+	if (MALI_HW_CORE_NO_COUNTER == val) {
+		r = snprintf(buf, 64, "-1\n");
+	} else {
+		r = snprintf(buf, 64, "%u\n", val);
+	}
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+	char buf[64];
+	long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	long val;
+	int ret;
+	u32 l2_id;
+	struct mali_l2_cache_core *l2_cache;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val < 0) {
+		/* any negative input will disable counter */
+		val = MALI_HW_CORE_NO_COUNTER;
+	}
+
+	l2_id = 0;
+	l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	while (NULL != l2_cache) {
+		mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val);
+
+		/* try next L2 */
+		l2_id++;
+		l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src0_read,
+	.write = l2_l2x_counter_src0_write,
+};
+
+static const struct file_operations l2_l2x_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_src1_read,
+	.write = l2_l2x_counter_src1_write,
+};
+
+static const struct file_operations l2_all_counter_src0_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src0_write,
+};
+
+static const struct file_operations l2_all_counter_src1_fops = {
+	.owner = THIS_MODULE,
+	.write = l2_all_counter_src1_write,
+};
+
+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id)
+{
+	char buf[64];
+	int r;
+	u32 src0 = 0;
+	u32 val0 = 0;
+	u32 src1 = 0;
+	u32 val1 = 0;
+	u32 val = -1;
+	struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data;
+
+	mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1);
+
+	if (0 == src_id) {
+		if (MALI_HW_CORE_NO_COUNTER != val0) {
+			val = val0;
+		}
+	} else {
+		if (MALI_HW_CORE_NO_COUNTER != val1) {
+			val = val1;
+		}
+	}
+
+	r = snprintf(buf, 64, "%u\n", val);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0);
+}
+
+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1);
+}
+
+static const struct file_operations l2_l2x_counter_val0_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val0_read,
+};
+
+static const struct file_operations l2_l2x_counter_val1_fops = {
+	.owner = THIS_MODULE,
+	.open  = open_copy_private_data,
+	.read  = l2_l2x_counter_val1_read,
+};
+
+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting (not exactly thread safe) */
+	if (1 == val && MALI_FALSE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_TRUE;
+		_mali_osk_pm_dev_ref_get_sync();
+	} else if (0 == val && MALI_TRUE == power_always_on_enabled) {
+		power_always_on_enabled = MALI_FALSE;
+		_mali_osk_pm_dev_ref_put();
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (MALI_TRUE == power_always_on_enabled) {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2);
+	} else {
+		return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+	}
+}
+
+static const struct file_operations power_always_on_fops = {
+	.owner = THIS_MODULE,
+	.read  = power_always_on_read,
+	.write = power_always_on_write,
+};
+
+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) {
+		mali_pm_os_suspend(MALI_TRUE);
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) {
+		mali_pm_os_resume();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) {
+		mali_dev_pause();
+	} else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) {
+		mali_dev_resume();
+	}
+	*ppos += cnt;
+	return cnt;
+}
+
+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig)
+{
+	file->f_pos = offset;
+	return 0;
+}
+
+static const struct file_operations power_power_events_fops = {
+	.owner = THIS_MODULE,
+	.write = power_power_events_write,
+	.llseek = power_power_events_seek,
+};
+
+#if MALI_STATE_TRACKING
+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v)
+{
+	u32 len = 0;
+	u32 size;
+	char *buf;
+
+	size = seq_get_buf(seq_file, &buf);
+
+	if (!size) {
+		return -ENOMEM;
+	}
+
+	/* Create the internal state dump. */
+	len  = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING);
+	len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE);
+
+	len += _mali_kernel_core_dump_state(buf + len, size - len);
+
+	seq_commit(seq_file, len);
+
+	return 0;
+}
+
+static int mali_seq_internal_state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mali_seq_internal_state_show, NULL);
+}
+
+static const struct file_operations mali_seq_internal_state_fops = {
+	.owner = THIS_MODULE,
+	.open = mali_seq_internal_state_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif /* MALI_STATE_TRACKING */
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (val != 0) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */
+
+		/* check if we are already recording */
+		if (MALI_TRUE == _mali_internal_profiling_is_recording()) {
+			MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n"));
+			return -EFAULT;
+		}
+
+		/* check if we need to clear out an old recording first */
+		if (MALI_TRUE == _mali_internal_profiling_have_recording()) {
+			if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) {
+				MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n"));
+				return -EFAULT;
+			}
+		}
+
+		/* start recording profiling data */
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit));
+	} else {
+		/* stop recording profiling data */
+		u32 count = 0;
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) {
+			MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n"));
+			return -EFAULT;
+		}
+
+		MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count));
+	}
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static const struct file_operations profiling_record_fops = {
+	.owner = THIS_MODULE,
+	.read  = profiling_record_read,
+	.write = profiling_record_write,
+};
+
+static void *profiling_events_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t *spos;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	if (NULL == spos) {
+		return NULL;
+	}
+
+	*spos = *pos;
+	return spos;
+}
+
+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	loff_t *spos = v;
+
+	/* check if we have data avaiable */
+	if (MALI_TRUE != _mali_internal_profiling_have_recording()) {
+		return NULL;
+	}
+
+	/* check if the next entry actually is avaiable */
+	if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) {
+		return NULL;
+	}
+
+	*pos = ++*spos;
+	return spos;
+}
+
+static void profiling_events_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static int profiling_events_show(struct seq_file *seq_file, void *v)
+{
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v)
+{
+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7))
+
+	static u64 start_time = 0;
+	loff_t *spos = v;
+	u32 index;
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+
+	index = (u32) * spos;
+
+	/* Retrieve all events */
+	if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, &timestamp, &event_id, data)) {
+		seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]);
+
+		if (0 == index) {
+			start_time = timestamp;
+		}
+
+		seq_printf(seq_file, "[%06u] ", index);
+
+		switch (event_id & 0x0F000000) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			seq_printf(seq_file, "SINGLE | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			seq_printf(seq_file, "START | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			seq_printf(seq_file, "STOP | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_SUSPEND:
+			seq_printf(seq_file, "SUSPEND | ");
+			break;
+		case MALI_PROFILING_EVENT_TYPE_RESUME:
+			seq_printf(seq_file, "RESUME | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24);
+			break;
+		}
+
+		switch (event_id & 0x00FF0000) {
+		case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE:
+			seq_printf(seq_file, "SW | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GP0:
+			seq_printf(seq_file, "GP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP0:
+			seq_printf(seq_file, "PP0 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP1:
+			seq_printf(seq_file, "PP1 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP2:
+			seq_printf(seq_file, "PP2 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP3:
+			seq_printf(seq_file, "PP3 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP4:
+			seq_printf(seq_file, "PP4 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP5:
+			seq_printf(seq_file, "PP5 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP6:
+			seq_printf(seq_file, "PP6 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_PP7:
+			seq_printf(seq_file, "PP7 | ");
+			break;
+		case MALI_PROFILING_EVENT_CHANNEL_GPU:
+			seq_printf(seq_file, "GPU | ");
+			break;
+		default:
+			seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16);
+			break;
+		}
+
+		if (MALI_EVENT_ID_IS_HW(event_id)) {
+			if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) {
+				switch (event_id & 0x0000FFFF) {
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL:
+					seq_printf(seq_file, "PHYSICAL | ");
+					break;
+				case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL:
+					seq_printf(seq_file, "VIRTUAL | ");
+					break;
+				default:
+					seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+					break;
+				}
+			} else {
+				seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+			}
+		} else {
+			seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF);
+		}
+
+		seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time);
+
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct seq_operations profiling_events_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show
+};
+
+static int profiling_events_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_seq_ops);
+}
+
+static const struct file_operations profiling_events_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static const struct seq_operations profiling_events_human_readable_seq_ops = {
+	.start = profiling_events_start,
+	.next  = profiling_events_next,
+	.stop  = profiling_events_stop,
+	.show  = profiling_events_show_human_readable
+};
+
+static int profiling_events_human_readable_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profiling_events_human_readable_seq_ops);
+}
+
+static const struct file_operations profiling_events_human_readable_fops = {
+	.owner = THIS_MODULE,
+	.open = profiling_events_human_readable_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static int memory_debugfs_show(struct seq_file *s, void *private_data)
+{
+#ifdef MALI_MEM_SWAP_TRACKING
+	seq_printf(s, "  %-25s  %-10s  %-10s  %-15s  %-15s  %-10s  %-10s %-10s \n"\
+		   "=================================================================================================================================\n",
+		   "Name (:bytes)", "pid", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem", "swap_mem");
+#else
+	seq_printf(s, "  %-25s  %-10s  %-10s  %-15s  %-15s  %-10s  %-10s \n"\
+		   "========================================================================================================================\n",
+		   "Name (:bytes)", "pid", "mali_mem", "max_mali_mem",
+		   "external_mem", "ump_mem", "dma_mem");
+#endif
+	mali_session_memory_tracking(s);
+	return 0;
+}
+
+static int memory_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, memory_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations memory_usage_fops = {
+	.owner = THIS_MODULE,
+	.open = memory_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_gp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 uval = _mali_ukk_utilization_pp();
+
+	r = snprintf(buf, 64, "%u\n", uval);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+
+static const struct file_operations utilization_gp_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_pp_read,
+};
+
+static const struct file_operations utilization_gp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_gp_read,
+};
+
+static const struct file_operations utilization_pp_fops = {
+	.owner = THIS_MODULE,
+	.read = utilization_pp_read,
+};
+
+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+	_mali_uk_user_setting_t setting;
+	char buf[32];
+
+	cnt = min(cnt, sizeof(buf) - 1);
+	if (copy_from_user(buf, ubuf, cnt)) {
+		return -EFAULT;
+	}
+	buf[cnt] = '\0';
+
+	ret = kstrtoul(buf, 10, &val);
+	if (0 != ret) {
+		return ret;
+	}
+
+	/* Update setting */
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	mali_set_user_setting(setting, val);
+
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	size_t r;
+	u32 value;
+	_mali_uk_user_setting_t setting;
+
+	setting = (_mali_uk_user_setting_t)(filp->private_data);
+	value = mali_get_user_setting(setting);
+
+	r = snprintf(buf, 64, "%u\n", value);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations user_settings_fops = {
+	.owner = THIS_MODULE,
+	.open = open_copy_private_data,
+	.read = user_settings_read,
+	.write = user_settings_write,
+};
+
+static int mali_sysfs_user_settings_register(void)
+{
+	struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir);
+
+	if (mali_user_settings_dir != NULL) {
+		long i;
+		for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) {
+			debugfs_create_file(_mali_uk_user_setting_descriptions[i],
+					    0600, mali_user_settings_dir, (void *)i,
+					    &user_settings_fops);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */
+	if (ret) {
+		return ret;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_num_cores_enabled_write,
+	.read = pp_num_cores_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r;
+	char buffer[64];
+
+	r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total());
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations pp_num_cores_total_fops = {
+	.owner = THIS_MODULE,
+	.read = pp_num_cores_total_read,
+};
+
+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp)
+{
+	int ret;
+	char buffer[32];
+	unsigned long val;
+
+	if (count >= sizeof(buffer)) {
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(&buffer[0], buf, count)) {
+		return -EFAULT;
+	}
+	buffer[count] = '\0';
+
+	ret = kstrtoul(&buffer[0], 10, &val);
+	if (0 != ret) {
+		return -EINVAL;
+	}
+
+	switch (val) {
+	case 1:
+		mali_executor_core_scaling_enable();
+		break;
+	case 0:
+		mali_executor_core_scaling_disable();
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	*offp += count;
+	return count;
+}
+
+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2);
+}
+static const struct file_operations pp_core_scaling_enabled_fops = {
+	.owner = THIS_MODULE,
+	.write = pp_core_scaling_enabled_write,
+	.read = pp_core_scaling_enabled_read,
+	.llseek = default_llseek,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp)
+{
+	int r = 0;
+	char buffer[64];
+
+	switch (mali_kernel_core_get_product_id()) {
+	case _MALI_PRODUCT_ID_MALI200:
+		r = snprintf(buffer, 64, "Mali-200\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI300:
+		r = snprintf(buffer, 64, "Mali-300\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI400:
+		r = snprintf(buffer, 64, "Mali-400 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI450:
+		r = snprintf(buffer, 64, "Mali-450 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_MALI470:
+		r = snprintf(buffer, 64, "Mali-470 MP\n");
+		break;
+	case _MALI_PRODUCT_ID_UNKNOWN:
+		return -EINVAL;
+		break;
+	};
+
+	return simple_read_from_buffer(buf, count, offp, buffer, r);
+}
+
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
+	.read = version_read,
+};
+
+#if defined(DEBUG)
+static int timeline_debugfs_show(struct seq_file *s, void *private_data)
+{
+	struct mali_session_data *session, *tmp;
+	u32 session_seq = 1;
+
+	seq_printf(s, "timeline system info: \n=================\n\n");
+
+	mali_session_lock();
+	MALI_SESSION_FOREACH(session, tmp, link) {
+		seq_printf(s, "session %d <%p> start:\n", session_seq, session);
+		mali_timeline_debug_print_system(session->timeline_system, s);
+		seq_printf(s, "session %d end\n\n\n", session_seq++);
+	}
+	mali_session_unlock();
+
+	return 0;
+}
+
+static int timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, timeline_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations timeline_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = timeline_debugfs_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+#endif
+
+int mali_sysfs_register(const char *mali_dev_name)
+{
+	mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL);
+	if (ERR_PTR(-ENODEV) == mali_debugfs_dir) {
+		/* Debugfs not supported. */
+		mali_debugfs_dir = NULL;
+	} else {
+		if (NULL != mali_debugfs_dir) {
+			/* Debugfs directory created successfully; create files now */
+			struct dentry *mali_power_dir;
+			struct dentry *mali_gp_dir;
+			struct dentry *mali_pp_dir;
+			struct dentry *mali_l2_dir;
+			struct dentry *mali_profiling_dir;
+
+			debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops);
+
+			mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir);
+			if (mali_power_dir != NULL) {
+				debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops);
+				debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops);
+			}
+
+			mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir);
+			if (mali_gp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_gp_core *gp_core = mali_group_get_gp_core(group);
+					if (NULL != gp_core) {
+						struct dentry *mali_gp_gpx_dir;
+						mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir);
+						if (NULL != mali_gp_gpx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops);
+							debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops);
+						}
+						break; /* no need to look for any other GP cores */
+					}
+
+				}
+			}
+
+			mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir);
+			if (mali_pp_dir != NULL) {
+				u32 num_groups;
+				long i;
+
+				debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops);
+				debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops);
+				debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops);
+
+				num_groups = mali_group_get_glob_num_groups();
+				for (i = 0; i < num_groups; i++) {
+					struct mali_group *group = mali_group_get_glob_group(i);
+
+					struct mali_pp_core *pp_core = mali_group_get_pp_core(group);
+					if (NULL != pp_core) {
+						char buf[16];
+						struct dentry *mali_pp_ppx_dir;
+						_mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core));
+						mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir);
+						if (NULL != mali_pp_ppx_dir) {
+							debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops);
+							if (!mali_group_is_virtual(group)) {
+								debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops);
+							}
+						}
+					}
+				}
+			}
+
+			mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir);
+			if (mali_l2_dir != NULL) {
+				struct dentry *mali_l2_all_dir;
+				u32 l2_id;
+				struct mali_l2_cache_core *l2_cache;
+
+				mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir);
+				if (mali_l2_all_dir != NULL) {
+					debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops);
+					debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops);
+				}
+
+				l2_id = 0;
+				l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				while (NULL != l2_cache) {
+					char buf[16];
+					struct dentry *mali_l2_l2x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id);
+					mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir);
+					if (NULL != mali_l2_l2x_dir) {
+						debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops);
+						debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops);
+						debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops);
+						debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops);
+						debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops);
+					}
+
+					/* try next L2 */
+					l2_id++;
+					l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id);
+				}
+			}
+
+			debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops);
+
+			debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops);
+			debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops);
+			debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops);
+
+			mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir);
+			if (mali_profiling_dir != NULL) {
+				u32 max_sub_jobs;
+				long i;
+				struct dentry *mali_profiling_gp_dir;
+				struct dentry *mali_profiling_pp_dir;
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				struct dentry *mali_profiling_proc_dir;
+#endif
+				/*
+				 * Create directory where we can set GP HW counters.
+				 */
+				mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir);
+				if (mali_profiling_gp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops);
+				}
+
+				/*
+				 * Create directory where we can set PP HW counters.
+				 * Possible override with specific HW counters for a particular sub job
+				 * (Disable core scaling before using the override!)
+				 */
+				mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir);
+				if (mali_profiling_pp_dir != NULL) {
+					debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops);
+					debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops);
+				}
+
+				max_sub_jobs = mali_executor_get_num_cores_total();
+				for (i = 0; i < max_sub_jobs; i++) {
+					char buf[16];
+					struct dentry *mali_profiling_pp_x_dir;
+					_mali_osk_snprintf(buf, sizeof(buf), "%u", i);
+					mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir);
+					if (NULL != mali_profiling_pp_x_dir) {
+						debugfs_create_file("counter_src0",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i),
+								    &profiling_counter_src_fops);
+						debugfs_create_file("counter_src1",
+								    0600, mali_profiling_pp_x_dir,
+								    (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i),
+								    &profiling_counter_src_fops);
+					}
+				}
+
+#if defined(CONFIG_MALI400_INTERNAL_PROFILING)
+				mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir);
+				if (mali_profiling_proc_dir != NULL) {
+					struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir);
+					if (mali_profiling_proc_default_dir != NULL) {
+						debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops);
+					}
+				}
+				debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops);
+				debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops);
+				debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops);
+#endif
+			}
+
+#if MALI_STATE_TRACKING
+			debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops);
+#endif
+
+#if defined(DEBUG)
+			debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops);
+#endif
+			if (mali_sysfs_user_settings_register()) {
+				/* Failed to create the debugfs entries for the user settings DB. */
+				MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n"));
+			}
+		}
+	}
+
+	/* Success! */
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	if (NULL != mali_debugfs_dir) {
+		debugfs_remove_recursive(mali_debugfs_dir);
+	}
+	return 0;
+}
+
+#else /* MALI_LICENSE_IS_GPL */
+
+/* Dummy implementations for non-GPL */
+
+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name)
+{
+	return 0;
+}
+
+int mali_sysfs_unregister(void)
+{
+	return 0;
+}
+
+#endif /* MALI_LICENSE_IS_GPL */
diff --git a/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.h b/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.h
new file mode 100644
index 000000000000..a36a0cea9972
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_kernel_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_KERNEL_SYSFS_H__
+#define __MALI_KERNEL_SYSFS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/device.h>
+
+#define MALI_PROC_DIR "driver/mali"
+
+int mali_sysfs_register(const char *mali_dev_name);
+int mali_sysfs_unregister(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_KERNEL_LINUX_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_linux_trace.h b/drivers/gpu/arm/utgard/linux/mali_linux_trace.h
new file mode 100644
index 000000000000..c6cd2bfb7217
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_linux_trace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ)
+#define MALI_LINUX_TRACE_H
+
+#include <linux/types.h>
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef  TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringfy(TRACE_SYSTEM)
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+/**
+ * Define the tracepoint used to communicate the status of a GPU. Called
+ * when a GPU turns on or turns off.
+ *
+ * @param event_id The type of the event. This parameter is a bitfield
+ *  encoding the type of the event.
+ *
+ * @param d0 First data parameter.
+ * @param d1 Second data parameter.
+ * @param d2 Third data parameter.
+ * @param d3 Fourth data parameter.
+ * @param d4 Fifth data parameter.
+ */
+TRACE_EVENT(mali_timeline_event,
+
+	    TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1,
+		     unsigned int d2, unsigned int d3, unsigned int d4),
+
+	    TP_ARGS(event_id, d0, d1, d2, d3, d4),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, event_id)
+		    __field(unsigned int, d0)
+		    __field(unsigned int, d1)
+		    __field(unsigned int, d2)
+		    __field(unsigned int, d3)
+		    __field(unsigned int, d4)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->event_id = event_id;
+		    __entry->d0 = d0;
+		    __entry->d1 = d1;
+		    __entry->d2 = d2;
+		    __entry->d3 = d3;
+		    __entry->d4 = d4;
+	    ),
+
+	    TP_printk("event=%d", __entry->event_id)
+	   );
+
+/**
+ * Define a tracepoint used to regsiter the value of a hardware counter.
+ * Hardware counters belonging to the vertex or fragment processor are
+ * reported via this tracepoint each frame, whilst L2 cache hardware
+ * counters are reported continuously.
+ *
+ * @param counter_id The counter ID.
+ * @param value The value of the counter.
+ */
+TRACE_EVENT(mali_hw_counter,
+
+	    TP_PROTO(unsigned int counter_id, unsigned int value),
+
+	    TP_ARGS(counter_id, value),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, counter_id)
+		    __field(unsigned int, value)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->counter_id = counter_id;
+	    ),
+
+	    TP_printk("event %d = %d", __entry->counter_id, __entry->value)
+	   );
+
+/**
+ * Define a tracepoint used to send a bundle of software counters.
+ *
+ * @param counters The bundle of counters.
+ */
+TRACE_EVENT(mali_sw_counters,
+
+	    TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters),
+
+	    TP_ARGS(pid, tid, surface_id, counters),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(pid_t, tid)
+		    __field(void *, surface_id)
+		    __field(unsigned int *, counters)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->tid = tid;
+		    __entry->surface_id = surface_id;
+		    __entry->counters = counters;
+	    ),
+
+	    TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL")
+	   );
+
+/**
+ * Define a tracepoint used to gather core activity for systrace
+ * @param pid The process id for which the core activity originates from
+ * @param active If the core is active (1) or not (0)
+ * @param core_type The type of core active, either GP (1) or PP (0)
+ * @param core_id The core id that is active for the core_type
+ * @param frame_builder_id The frame builder id associated with this core activity
+ * @param flush_id The flush id associated with this core activity
+ */
+TRACE_EVENT(mali_core_active,
+
+	    TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id),
+
+	    TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id),
+
+	    TP_STRUCT__entry(
+		    __field(pid_t, pid)
+		    __field(unsigned int, active)
+		    __field(unsigned int, core_type)
+		    __field(unsigned int, core_id)
+		    __field(unsigned int, frame_builder_id)
+		    __field(unsigned int, flush_id)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->pid = pid;
+		    __entry->active = active;
+		    __entry->core_type = core_type;
+		    __entry->core_id = core_id;
+		    __entry->frame_builder_id = frame_builder_id;
+		    __entry->flush_id = flush_id;
+	    ),
+
+	    TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id)
+	   );
+
+#endif /* MALI_LINUX_TRACE_H */
+
+/* This part must exist outside the header guard. */
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory.c b/drivers/gpu/arm/utgard/linux/mali_memory.c
new file mode 100644
index 000000000000..c45f6ee25887
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_executor.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_util.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_swap_alloc.h"
+#include "mali_memory_defer_bind.h"
+
+extern unsigned int mali_dedicated_mem_size;
+extern unsigned int mali_shared_mem_size;
+
+#define MALI_VM_NUM_FAULT_PREFETCH (0x8)
+
+static void mali_mem_vma_open(struct vm_area_struct *vma)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma));
+
+	/* If need to share the allocation, add ref_count here */
+	mali_allocation_ref(alloc);
+	return;
+}
+static void mali_mem_vma_close(struct vm_area_struct *vma)
+{
+	/* If need to share the allocation, unref ref_count here */
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+
+	mali_allocation_unref(&alloc);
+	vma->vm_private_data = NULL;
+}
+
+static int mali_mem_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret;
+	int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH;
+
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	MALI_DEBUG_ASSERT(alloc->backend_handle);
+	MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address);
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type);
+
+	if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED !=
+			(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) &&
+	    (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) {
+		/*check if use page fault to do COW*/
+		MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address));
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_allocate_on_demand(mem_bkend,
+						      (address - vma->vm_start) / PAGE_SIZE);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			return VM_FAULT_OOM;
+		}
+		prefetch_num = 1;
+
+		/* handle COW modified range cpu mapping
+		 we zap the mapping in cow_modify_range, it will trigger page fault
+		 when CPU access it, so here we map it to CPU*/
+		mutex_lock(&mem_bkend->mutex);
+		ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num);
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+			return VM_FAULT_SIGBUS;
+		}
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) ||
+		   (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE;
+		int ret = _MALI_OSK_ERR_OK;
+
+		mutex_lock(&mem_bkend->mutex);
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) {
+			ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		} else {
+			ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page);
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		if (ret != _MALI_OSK_ERR_OK) {
+			MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address));
+			return VM_FAULT_OOM;
+		} else {
+			return VM_FAULT_LOCKED;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(0);
+		/*NOT support yet*/
+	}
+	return VM_FAULT_NOPAGE;
+}
+
+static struct vm_operations_struct mali_kernel_vm_ops = {
+	.open = mali_mem_vma_open,
+	.close = mali_mem_vma_close,
+	.fault = mali_mem_vma_fault,
+};
+
+
+/** @ map mali allocation to CPU address
+*
+* Supported backend types:
+* --MALI_MEM_OS
+* -- need to add COW?
+ *Not supported backend types:
+* -_MALI_MEMORY_BIND_BACKEND_UMP
+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+*
+*/
+int mali_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mali_session_data *session;
+	mali_mem_allocation *mali_alloc = NULL;
+	u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int ret = -EFAULT;
+
+	session = (struct mali_session_data *)filp->private_data;
+	if (NULL == session) {
+		MALI_PRINT_ERROR(("mmap called without any session data available\n"));
+		return -EFAULT;
+	}
+
+	MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n",
+			     (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT),
+			     (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags));
+
+	/* Operations used on any memory system */
+	/* do not need to anything in vm open/close now */
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		if (unlikely(mali_addr != mali_vma_node->vm_node.start)) {
+			/* only allow to use start address for mmap */
+			MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n"));
+			return -EFAULT;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(NULL == mali_vma_node);
+		return -EFAULT;
+	}
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n"));
+		return -EFAULT;
+	}
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		return -EFAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+
+	if (!(MALI_MEM_SWAP == mali_alloc->type ||
+	      (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/* Set some bits which indicate that, the memory is IO memory, meaning
+		 * that no paging is to be performed and the memory should not be
+		 * included in crash dumps. And that the memory is reserved, meaning
+		 * that it's present and can never be paged out (see also previous
+		 * entry)
+		 */
+		vma->vm_flags |= VM_IO;
+		vma->vm_flags |= VM_DONTCOPY;
+		vma->vm_flags |= VM_PFNMAP;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+		vma->vm_flags |= VM_RESERVED;
+#else
+		vma->vm_flags |= VM_DONTDUMP;
+		vma->vm_flags |= VM_DONTEXPAND;
+#endif
+	} else if (MALI_MEM_SWAP == mali_alloc->type) {
+		vma->vm_pgoff = mem_bkend->start_idx;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &mali_kernel_vm_ops;
+
+	mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start;
+
+	/* If it's a copy-on-write mapping, map to read only */
+	if (!(vma->vm_flags & VM_WRITE)) {
+		MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n"));
+		/* add VM_WRITE for do_page_fault will check this when a write fault */
+		vma->vm_flags |= VM_WRITE | VM_READ;
+		vma->vm_page_prot = PAGE_READONLY;
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE;
+		goto out;
+	}
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		ret = mali_mem_os_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_COW &&
+		   (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+		ret = mali_mem_cow_cpu_map(mem_bkend, vma);
+	} else if (mem_bkend->type == MALI_MEM_BLOCK) {
+		ret = mali_mem_block_cpu_map(mem_bkend, vma);
+	} else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW &&
+			(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) {
+		/*For swappable memory, CPU page table will be created by page fault handler. */
+		ret = 0;
+	} else {
+		/* Not support yet*/
+		MALI_DEBUG_ASSERT(0);
+	}
+
+	if (ret != 0) {
+		MALI_DEBUG_PRINT(1, ("ret != 0\n"));
+		return -EFAULT;
+	}
+out:
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic);
+
+	vma->vm_private_data = (void *)mali_alloc;
+	mali_alloc->cpu_mapping.vma = vma;
+
+	mali_allocation_ref(mali_alloc);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor)
+{
+	u32 size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	/* Map dma-buf into this session's page tables */
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size);
+}
+
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size)
+{
+	u32 old_size = descriptor->psize;
+	struct mali_session_data *session = descriptor->session;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic);
+
+	if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		new_size  += MALI_MMU_PAGE_SIZE;
+	}
+
+	if (new_size > old_size) {
+		MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size);
+		return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size);
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags)
+{
+	if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		size += MALI_MMU_PAGE_SIZE;
+	}
+
+	/* Umap and flush L2 */
+	mali_mmu_pagedir_unmap(session->page_directory, vaddr, size);
+	mali_executor_zap_all_active(session);
+}
+
+u32 _mali_ukk_report_memory_usage(void)
+{
+	u32 sum = 0;
+
+	if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		sum += mali_mem_block_allocator_stat();
+	}
+
+	sum += mali_mem_os_stat();
+
+	return sum;
+}
+
+u32 _mali_ukk_report_total_memory_size(void)
+{
+	return mali_dedicated_mem_size + mali_shared_mem_size;
+}
+
+
+/**
+ * Per-session memory descriptor mapping table sizes
+ */
+#define MALI_MEM_DESCRIPTORS_INIT 64
+#define MALI_MEM_DESCRIPTORS_MAX 65536
+
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data)
+{
+	MALI_DEBUG_PRINT(5, ("Memory session begin\n"));
+
+	session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED,
+				    _MALI_OSK_LOCK_ORDER_MEM_SESSION);
+
+	if (NULL == session_data->memory_lock) {
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_memory_manager_init(&session_data->allocation_mgr);
+
+	MALI_DEBUG_PRINT(5, ("MMU session begin: success\n"));
+	MALI_SUCCESS;
+}
+
+void mali_memory_session_end(struct mali_session_data *session)
+{
+	MALI_DEBUG_PRINT(3, ("MMU session end\n"));
+
+	if (NULL == session) {
+		MALI_DEBUG_PRINT(1, ("No session data found during session end\n"));
+		return;
+	}
+	/* free allocation */
+	mali_free_session_allocations(session);
+	/* do some check in unint*/
+	mali_memory_manager_uninit(&session->allocation_mgr);
+
+	/* Free the lock */
+	_mali_osk_mutex_term(session->memory_lock);
+
+	return;
+}
+
+_mali_osk_errcode_t mali_memory_initialize(void)
+{
+	_mali_osk_errcode_t err;
+
+	idr_init(&mali_backend_idr);
+	mutex_init(&mali_idr_mutex);
+
+	err = mali_mem_swap_init();
+	if (err != _MALI_OSK_ERR_OK) {
+		return err;
+	}
+	err = mali_mem_os_init();
+	if (_MALI_OSK_ERR_OK == err) {
+		err = mali_mem_defer_bind_manager_init();
+	}
+
+	return err;
+}
+
+void mali_memory_terminate(void)
+{
+	mali_mem_swap_term();
+	mali_mem_defer_bind_manager_destory();
+	mali_mem_os_term();
+	if (mali_memory_have_dedicated_memory()) {
+		mali_mem_block_allocator_destroy();
+	}
+}
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type)
+{
+	mali_page_node *page_node = NULL;
+
+	page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL);
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	if (page_node) {
+		page_node->type = type;
+		INIT_LIST_HEAD(&page_node->list);
+	}
+
+	return page_node;
+}
+
+void _mali_page_node_ref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* add ref to this page */
+		get_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_add_ref(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		atomic_inc(&node->swap_it->ref_count);
+	} else
+		MALI_DEBUG_ASSERT(0);
+}
+
+void _mali_page_node_unref(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* unref to this page */
+		put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		mali_mem_block_dec_ref(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+}
+
+
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type);
+	node->page = page;
+}
+
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type);
+	node->swap_it = item;
+}
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item)
+{
+	MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type);
+	node->blk_it = item;
+}
+
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		/* get ref count of this page */
+		return page_count(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_get_ref_count(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return atomic_read(&node->swap_it->ref_count);
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return -1;
+}
+
+
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_private(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return _mali_blk_item_get_phy_addr(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return node->swap_it->dma_addr;
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return 0;
+}
+
+
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return page_to_pfn(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		/* get phy addr for BLOCK page*/
+		return _mali_blk_item_get_pfn(node->blk_it);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return page_to_pfn(node->swap_it->page);
+	} else {
+		MALI_DEBUG_ASSERT(0);
+	}
+	return 0;
+}
+
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory.h b/drivers/gpu/arm/utgard/linux/mali_memory.h
new file mode 100644
index 000000000000..3140c6c98d2c
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_H__
+#define __MALI_MEMORY_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+
+_mali_osk_errcode_t mali_memory_initialize(void);
+void mali_memory_terminate(void);
+
+/** @brief Allocate a page table page
+ *
+ * Allocate a page for use as a page directory or page table. The page is
+ * mapped into kernel space.
+ *
+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code
+ * @param table_page GPU pointer to the allocated page
+ * @param mapping CPU pointer to the mapping of the allocated page
+ */
+MALI_STATIC_INLINE _mali_osk_errcode_t
+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping)
+{
+	return mali_mem_os_get_table_page(table_page, mapping);
+}
+
+/** @brief Release a page table page
+ *
+ * Release a page table page allocated through \a mali_mmu_get_table_page
+ *
+ * @param pa the GPU address of the page to release
+ */
+MALI_STATIC_INLINE void
+mali_mmu_release_table_page(mali_dma_addr phys, void *virt)
+{
+	mali_mem_os_release_table_page(phys, virt);
+}
+
+/** @brief mmap function
+ *
+ * mmap syscalls on the Mali device node will end up here.
+ *
+ * This function allocates Mali memory and maps it on CPU and Mali.
+ */
+int mali_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/** @brief Start a new memory session
+ *
+ * Called when a process opens the Mali device node.
+ *
+ * @param session Pointer to session to initialize
+ */
+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session);
+
+/** @brief Close a memory session
+ *
+ * Called when a process closes the Mali device node.
+ *
+ * Memory allocated by the session will be freed
+ *
+ * @param session Pointer to the session to terminate
+ */
+void mali_memory_session_end(struct mali_session_data *session);
+
+/** @brief Prepare Mali page tables for mapping
+ *
+ * This function will prepare the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ */
+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor);
+
+/** @brief Resize Mali page tables for mapping
+ *
+ * This function will Resize the Mali page tables for mapping the memory
+ * described by \a descriptor.
+ *
+ * Page tables will be reference counted and allocated, if not yet present.
+ *
+ * @param descriptor Pointer to the memory descriptor to the mapping
+ * @param new_size The new size of descriptor
+ */
+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size);
+
+/** @brief Free Mali page tables for mapping
+ *
+ * This function will unmap pages from Mali memory and free the page tables
+ * that are now unused.
+ *
+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary.
+ *
+ * @param descriptor Pointer to the memory descriptor to unmap
+ */
+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags);
+
+/** @brief Parse resource and prepare the OS memory allocator
+ *
+ * @param size Maximum size to allocate for Mali GPU.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size);
+
+/** @brief Parse resource and prepare the dedicated memory allocator
+ *
+ * @param start Physical start address of dedicated Mali GPU memory.
+ * @param size Size of dedicated Mali GPU memory.
+ * @return _MALI_OSK_ERR_OK on success, otherwise failure.
+ */
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+
+
+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type);
+
+void _mali_page_node_ref(struct mali_page_node *node);
+void _mali_page_node_unref(struct mali_page_node *node);
+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page);
+
+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item);
+
+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item);
+
+int _mali_page_node_get_ref_count(struct mali_page_node *node);
+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node);
+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node);
+
+#endif /* __MALI_MEMORY_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.c b/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.c
new file mode 100644
index 000000000000..453ddda3080f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_memory.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_osk.h"
+#include <linux/mutex.h>
+
+
+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item)
+{
+	return (item->phy_addr & ~(MALI_BLOCK_REF_MASK));
+}
+
+
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item)
+{
+	return (item->phy_addr / MALI_BLOCK_SIZE);
+}
+
+
+u32 mali_mem_block_get_ref_count(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK);
+}
+
+
+/* Increase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+
+u32 mali_mem_block_add_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT);
+	return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK);
+}
+
+/* Decase the refence count
+* It not atomic, so it need to get sp_lock before call this function
+*/
+u32 mali_mem_block_dec_ref(mali_page_node *node)
+{
+	MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK);
+	MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0);
+	return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK);
+}
+
+
+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size)
+{
+	mali_block_allocator *info;
+	u32 usable_size;
+	u32 num_blocks;
+	mali_page_node *m_node;
+	mali_block_item *mali_blk_items = NULL;
+	int i = 0;
+
+	usable_size = size & ~(MALI_BLOCK_SIZE - 1);
+	MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size));
+	MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size));
+	num_blocks = usable_size / MALI_BLOCK_SIZE;
+	MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks));
+
+	if (usable_size == 0) {
+		MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size));
+		return NULL;
+	}
+
+	info = _mali_osk_calloc(1, sizeof(mali_block_allocator));
+	if (NULL != info) {
+		INIT_LIST_HEAD(&info->free);
+		spin_lock_init(&info->sp_lock);
+		info->total_num = num_blocks;
+		mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks);
+
+		if (mali_blk_items) {
+			info->items = mali_blk_items;
+			/* add blocks(4k size) to free list*/
+			for (i = 0 ; i < num_blocks ; i++) {
+				/* add block information*/
+				mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE);
+				/* add  to free list */
+				m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+				if (m_node == NULL)
+					goto fail;
+				_mali_page_node_add_block_item(m_node, &(mali_blk_items[i]));
+				list_add_tail(&m_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			}
+			return info;
+		}
+	}
+fail:
+	mali_mem_block_allocator_destroy();
+	return NULL;
+}
+
+void mali_mem_block_allocator_destroy(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+	MALI_DEBUG_PRINT(4, ("Memory block destroy !\n"));
+
+	if (NULL == info)
+		return;
+
+	list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		list_del(&m_page->list);
+		kfree(m_page);
+	}
+
+	_mali_osk_free(info->items);
+	_mali_osk_free(info);
+}
+
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend)
+{
+	mali_mem_allocation *alloc = mem_bkend->mali_allocation;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_block_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem);
+	mutex_unlock(&mem_bkend->mutex);
+	return free_pages_nr;
+}
+
+
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(info);
+
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size));
+	/*do some init */
+	INIT_LIST_HEAD(&block_mem->pfns);
+
+	spin_lock(&info->sp_lock);
+	/*check if have enough space*/
+	if (atomic_read(&info->free_num) > page_count) {
+		list_for_each_entry_safe(m_page, m_tmp , &info->free, list) {
+			if (page_count > 0) {
+				MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+				MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0);
+				list_move(&m_page->list, &block_mem->pfns);
+				block_mem->count++;
+				atomic_dec(&info->free_num);
+				_mali_page_node_ref(m_page);
+			} else {
+				break;
+			}
+			page_count--;
+		}
+	} else {
+		/* can't allocate from BLOCK memory*/
+		spin_unlock(&info->sp_lock);
+		return -1;
+	}
+
+	spin_unlock(&info->sp_lock);
+	return 0;
+}
+
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem)
+{
+	u32 free_pages_nr = 0;
+
+	free_pages_nr = mali_mem_block_free_list(&block_mem->pfns);
+	MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+	block_mem->count = 0;
+	MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns));
+
+	return free_pages_nr;
+}
+
+
+u32 mali_mem_block_free_list(struct list_head *list)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	u32 free_pages_nr = 0;
+
+	if (info) {
+		spin_lock(&info->sp_lock);
+		list_for_each_entry_safe(m_page, m_tmp , list, list) {
+			if (1 == _mali_page_node_get_ref_count(m_page)) {
+				free_pages_nr++;
+			}
+			mali_mem_block_free_node(m_page);
+		}
+		spin_unlock(&info->sp_lock);
+	}
+	return free_pages_nr;
+}
+
+/* free the node,*/
+void mali_mem_block_free_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/*Move to free list*/
+			_mali_page_node_unref(node);
+			list_move_tail(&node->list, &info->free);
+			atomic_add(1, &info->free_num);
+		} else {
+			_mali_page_node_unref(node);
+			list_del(&node->list);
+			kfree(node);
+		}
+	}
+}
+
+/* unref the node, but not free it */
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node)
+{
+	mali_block_allocator *info = mali_mem_block_gobal_allocator;
+	mali_page_node *new_node;
+
+	/* only handle BLOCK node */
+	if (node->type == MALI_PAGE_NODE_BLOCK && info) {
+		/*Need to make this atomic?*/
+		if (1 == _mali_page_node_get_ref_count(node)) {
+			/* allocate a  new node, Add to free list, keep the old node*/
+			_mali_page_node_unref(node);
+			new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK);
+			if (new_node) {
+				memcpy(new_node, node, sizeof(mali_page_node));
+				list_add(&new_node->list, &info->free);
+				atomic_add(1, &info->free_num);
+			} else
+				return _MALI_OSK_ERR_FAULT;
+
+		} else {
+			_mali_page_node_unref(node);
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+		/* Verify that the "physical" address is 32-bit and
+		 * usable for Mali, when on a system with bus addresses
+		 * wider than 32-bit. */
+		MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+		mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	int ret;
+	mali_mem_block_mem *block_mem = &mem_bkend->block_mem;
+	unsigned long addr = vma->vm_start;
+	struct mali_page_node *m_page;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK);
+
+	list_for_each_entry(m_page, &block_mem->pfns, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK);
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size)
+{
+	mali_block_allocator *allocator;
+
+	/* Do the low level linux operation first */
+
+	/* Request ownership of the memory */
+	if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) {
+		MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Create generic block allocator object to handle it */
+	allocator = mali_mem_block_allocator_create(start, size);
+
+	if (NULL == allocator) {
+		MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n"));
+		_mali_osk_mem_unreqregion(start, size);
+		MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	}
+
+	mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool mali_memory_have_dedicated_memory(void)
+{
+	return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE;
+}
+
+u32 mali_mem_block_allocator_stat(void)
+{
+	mali_block_allocator *allocator = mali_mem_block_gobal_allocator;
+	MALI_DEBUG_ASSERT_POINTER(allocator);
+
+	return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.h b/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.h
new file mode 100644
index 000000000000..129434de67df
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_block_alloc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_BLOCK_ALLOCATOR_H__
+#define __MALI_BLOCK_ALLOCATOR_H__
+
+#include "mali_session.h"
+#include "mali_memory.h"
+#include <linux/spinlock.h>
+
+#include "mali_memory_types.h"
+
+#define MALI_BLOCK_SIZE (PAGE_SIZE)  /* 4 kB, manage BLOCK memory as page size */
+#define MALI_BLOCK_REF_MASK (0xFFF)
+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF)
+
+
+
+typedef struct mali_block_allocator {
+	/*
+	* In free list, each node's ref_count is 0,
+	* ref_count added when allocated or referenced in COW
+	*/
+	mali_block_item *items; /* information for each block item*/
+	struct list_head free; /*free list of mali_memory_node*/
+	spinlock_t sp_lock; /*lock for reference count & free list opertion*/
+	u32 total_num; /* Number of total pages*/
+	atomic_t free_num; /*number of free pages*/
+} mali_block_allocator;
+
+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item);
+unsigned long _mali_blk_item_get_pfn(mali_block_item *item);
+u32 mali_mem_block_get_ref_count(mali_page_node *node);
+u32 mali_mem_block_add_ref(mali_page_node *node);
+u32 mali_mem_block_dec_ref(mali_page_node *node);
+u32 mali_mem_block_release(mali_mem_backend *mem_bkend);
+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size);
+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size);
+mali_bool mali_memory_have_dedicated_memory(void);
+u32 mali_mem_block_free(mali_mem_block_mem *block_mem);
+u32 mali_mem_block_free_list(struct list_head *list);
+void mali_mem_block_free_node(struct mali_page_node *node);
+void mali_mem_block_allocator_destroy(void);
+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node);
+u32 mali_mem_block_allocator_stat(void);
+
+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_cow.c b/drivers/gpu/arm/utgard/linux/mali_memory_cow.c
new file mode 100644
index 000000000000..bcd0f8713771
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_cow.c
@@ -0,0 +1,754 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+/**
+* allocate pages for COW backend and flush cache
+*/
+static struct page *mali_mem_cow_alloc_page(void)
+
+{
+	mali_mem_os_mem os_mem;
+	struct mali_page_node *node;
+	struct page *new_page;
+
+	int ret = 0;
+	/* allocate pages from os mem */
+	ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE);
+
+	if (ret) {
+		return NULL;
+	}
+
+	MALI_DEBUG_ASSERT(1 == os_mem.count);
+
+	node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list);
+	new_page = node->page;
+	node->page = NULL;
+	list_del(&node->list);
+	kfree(node);
+
+	return new_page;
+}
+
+
+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size)
+{
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type ||
+			  MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type);
+
+	if (MALI_MEM_OS == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->os_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count);
+		return &target_bk->os_mem.pages;
+	} else if (MALI_MEM_COW == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->cow_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count);
+		return  &target_bk->cow_mem.pages;
+	} else if (MALI_MEM_BLOCK == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->block_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count);
+		return  &target_bk->block_mem.pfns;
+	} else if (MALI_MEM_SWAP == target_bk->type) {
+		MALI_DEBUG_ASSERT(&target_bk->swap_mem);
+		MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count);
+		return  &target_bk->swap_mem.pages;
+	}
+
+	return NULL;
+}
+
+/**
+* Do COW for os memory - support do COW for memory from bank memory
+* The range_start/size can be zero, which means it will call cow_modify_range
+* latter.
+* This function allocate new pages for COW backend from os mem for a modified range
+* It will keep the page which not in the modified range and Add ref to it
+*
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range
+*/
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct page *new_page;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+
+	if (NULL == pages) {
+		MALI_DEBUG_ASSERT(0);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, alway use OS memory for COW */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			INIT_LIST_HEAD(&page_node->list);
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_page(page_node, new_page);
+			} else {
+				/*Add Block memory case*/
+				if (m_page->type != MALI_PAGE_NODE_BLOCK) {
+					_mali_page_node_add_page(page_node, m_page->page);
+				} else {
+					page_node->type = MALI_PAGE_NODE_BLOCK;
+					_mali_page_node_add_block_item(page_node, m_page->blk_it);
+				}
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			/* add it to COW backend page list */
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_cow_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp, *page_node;
+	int target_page = 0;
+	struct mali_swap_item *swap_item;
+	struct list_head *pages = NULL;
+
+	pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size);
+	if (NULL == pages) {
+		MALI_DEBUG_ASSERT(0);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	MALI_DEBUG_ASSERT(0 == cow->count);
+
+	INIT_LIST_HEAD(&cow->pages);
+	mutex_lock(&target_bk->mutex);
+
+	backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry_safe(m_page, m_tmp, pages, list) {
+		/* add page from (target_offset,target_offset+size) to cow backend */
+		if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) {
+
+			/* allocate a new page node, use swap memory for COW memory swap cowed flag. */
+			page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+			if (NULL == page_node) {
+				mutex_unlock(&target_bk->mutex);
+				goto error;
+			}
+
+			/* check if in the modified range*/
+			if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+			    (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+				/* need to allocate a new page */
+				/* To simplify the case, All COW memory is allocated from os memory ?*/
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					kfree(page_node);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n"));
+					kfree(page_node);
+					kfree(swap_item);
+					mutex_unlock(&target_bk->mutex);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(page_node, swap_item);
+			} else {
+				_mali_page_node_add_swap_item(page_node, m_page->swap_it);
+
+				/* add ref to this page */
+				_mali_page_node_ref(m_page);
+			}
+
+			list_add_tail(&page_node->list, &cow->pages);
+			cow->count++;
+		}
+		target_page++;
+	}
+	mutex_unlock(&target_bk->mutex);
+
+	return _MALI_OSK_ERR_OK;
+error:
+	mali_mem_swap_release(backend, MALI_FALSE);
+	return _MALI_OSK_ERR_FAULT;
+
+}
+
+
+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node)
+{
+	if (node->type == MALI_PAGE_NODE_OS) {
+		return mali_mem_os_put_page(node->page);
+	} else if (node->type == MALI_PAGE_NODE_BLOCK) {
+		return mali_mem_block_unref_node(node);
+	} else if (node->type == MALI_PAGE_NODE_SWAP) {
+		return _mali_mem_swap_put_page_node(node);
+	} else
+		MALI_DEBUG_ASSERT(0);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+
+/**
+* Modify a range of a exist COW backend
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size)
+{
+	mali_mem_allocation *alloc = NULL;
+	mali_mem_cow *cow = &backend->cow_mem;
+	struct mali_page_node *m_page, *m_tmp;
+	LIST_HEAD(pages);
+	struct page *new_page;
+	u32 count = 0;
+	s32 change_pages_nr = 0;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	alloc = backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+	MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count);
+
+	mutex_lock(&backend->mutex);
+
+	/* free pages*/
+	list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) {
+
+		/* check if in the modified range*/
+		if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) &&
+		    (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) {
+			if (MALI_PAGE_NODE_SWAP != m_page->type) {
+				new_page = mali_mem_cow_alloc_page();
+
+				if (NULL == new_page) {
+					goto error;
+				}
+				if (1 != _mali_page_node_get_ref_count(m_page))
+					change_pages_nr++;
+				/* unref old page*/
+				if (_mali_mem_put_page_node(m_page)) {
+					__free_page(new_page);
+					goto error;
+				}
+				/* add new page*/
+				/* always use OS for COW*/
+				m_page->type = MALI_PAGE_NODE_OS;
+				_mali_page_node_add_page(m_page, new_page);
+			} else {
+				struct mali_swap_item *swap_item;
+
+				swap_item = mali_mem_swap_alloc_swap_item();
+
+				if (NULL == swap_item) {
+					goto error;
+				}
+
+				swap_item->idx = mali_mem_swap_idx_alloc();
+
+				if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) {
+					MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n"));
+					kfree(swap_item);
+					goto error;
+				}
+
+				if (1 != _mali_page_node_get_ref_count(m_page)) {
+					change_pages_nr++;
+				}
+
+				if (_mali_mem_put_page_node(m_page)) {
+					mali_mem_swap_free_swap_item(swap_item);
+					goto error;
+				}
+
+				_mali_page_node_add_swap_item(m_page, swap_item);
+			}
+		}
+		count++;
+	}
+	cow->change_pages_nr  = change_pages_nr;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type);
+
+	/* ZAP cpu mapping(modified range), and do cpu mapping here if need */
+	if (NULL != alloc->cpu_mapping.vma) {
+		MALI_DEBUG_ASSERT(0 != alloc->backend_handle);
+		MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma);
+		MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size);
+
+		if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+
+			ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start  + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE);
+
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n"));
+				ret =  _MALI_OSK_ERR_FAULT;
+			}
+		} else {
+			/* used to trigger page fault for swappable cowed memory. */
+			alloc->cpu_mapping.vma->vm_flags |= VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags |= VM_MIXEDMAP;
+
+			zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size);
+			/* delete this flag to let swappble is ummapped regard to stauct page not page frame. */
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_PFNMAP;
+			alloc->cpu_mapping.vma->vm_flags &= ~VM_MIXEDMAP;
+		}
+	}
+
+error:
+	mutex_unlock(&backend->mutex);
+	return ret;
+
+}
+
+
+/**
+* Allocate pages for COW backend
+* @alloc  -allocation for COW allocation
+* @target_bk - target allocation's backend(the allocation need to do COW)
+* @target_offset - the offset in target allocation to do COW(for support COW  a memory allocated from memory_bank, 4K align)
+* @target_size - size of target allocation to do COW (for support memory bank)(in byte)
+* @backend -COW backend
+* @range_start - offset of modified range (4K align)
+* @range_size - size of modified range(in byte)
+*/
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size)
+{
+	struct mali_session_data *session = backend->mali_allocation->session;
+
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size & offset must be a multiple of the system page size */
+	if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+	if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check backend type */
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type);
+
+	switch (target_bk->type) {
+	case MALI_MEM_OS:
+	case MALI_MEM_BLOCK:
+		return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_COW:
+		if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		} else {
+			return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		}
+		break;
+	case MALI_MEM_SWAP:
+		return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size);
+		break;
+	case MALI_MEM_EXTERNAL:
+		/*NOT support yet*/
+		MALI_DEBUG_ASSERT(0);
+		break;
+	case MALI_MEM_DMA_BUF:
+		/*NOT support yet*/
+		MALI_DEBUG_ASSERT(0);
+		break;
+	case MALI_MEM_UMP:
+		/*NOT support yet*/
+		MALI_DEBUG_ASSERT(0);
+		break;
+	default:
+		/*Not support yet*/
+		MALI_DEBUG_ASSERT(0);
+		break;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Map COW backend memory to mali
+* Support OS/BLOCK for mali_page_node
+*/
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size)
+{
+	mali_mem_allocation *cow_alloc;
+	struct mali_page_node *m_page;
+	struct mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	u32 virt, start;
+
+	cow_alloc = mem_bkend->mali_allocation;
+	virt = cow_alloc->mali_vma_node.vm_node.start;
+	start = virt;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT_POINTER(cow_alloc);
+
+	session = cow_alloc->session;
+	pagedir = session->page_directory;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+	list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) {
+		if ((virt - start >= range_start) && (virt - start < range_start + range_size)) {
+			dma_addr_t phys = _mali_page_node_get_dma_addr(m_page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+			mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys,
+						MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+* Map COW backend to cpu
+* support OS/BLOCK memory
+*/
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		ret = vm_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page));
+
+		if (unlikely(0 != ret)) {
+			return ret;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+* Map some pages(COW backend) to CPU vma@vaddr
+*@ mem_bkend - COW backend
+*@ vma
+*@ vaddr -start CPU vaddr mapped to
+*@ num - max number of pages to map to CPU vaddr
+*/
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num)
+{
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int count ;
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW);
+	MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if ((count >= offset) && (count < offset + num)) {
+			ret = vm_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page));
+
+			if (unlikely(0 != ret)) {
+				if (count == offset) {
+					return _MALI_OSK_ERR_FAULT;
+				} else {
+					/* ret is EBUSY when page isn't in modify range, but now it's OK*/
+					return _MALI_OSK_ERR_OK;
+				}
+			}
+			vaddr += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+		count++;
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+* Release COW backend memory
+* free it directly(put_page--unref page), not put into pool
+*/
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) {
+		/* Unmap the memory from the mali virtual address space. */
+		if (MALI_TRUE == is_mali_mapped)
+			mali_mem_os_mali_unmap(alloc);
+		/* free cow backend list*/
+		free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE);
+		free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages);
+
+		MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages));
+	} else {
+		free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped);
+	}
+
+
+	MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->cow_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+/* Dst node could os node or swap node. */
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node)
+{
+	void *dst, *src;
+	struct page *dst_page;
+	dma_addr_t dma_addr;
+
+	MALI_DEBUG_ASSERT(src_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node != NULL);
+	MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS
+			  || dst_node->type == MALI_PAGE_NODE_SWAP);
+
+	if (dst_node->type == MALI_PAGE_NODE_OS) {
+		dst_page = dst_node->page;
+	} else {
+		dst_page = dst_node->swap_it->page;
+	}
+
+	dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node),
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* map it , and copy the content*/
+	dst = kmap_atomic(dst_page);
+
+	if (src_node->type == MALI_PAGE_NODE_OS ||
+	    src_node->type == MALI_PAGE_NODE_SWAP) {
+		struct page *src_page;
+
+		if (src_node->type == MALI_PAGE_NODE_OS) {
+			src_page = src_node->page;
+		} else {
+			src_page = src_node->swap_it->page;
+		}
+
+		/* Clear and invaliate cache */
+		/* In ARM architecture, speculative read may pull stale data into L1 cache
+		 * for kernel linear mapping page table. DMA_BIDIRECTIONAL could
+		 * invalidate the L1 cache so that following read get the latest data
+		*/
+		dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		src = kmap_atomic(src_page);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		kunmap_atomic(src);
+		dma_addr = dma_map_page(&mali_platform_device->dev, src_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		if (src_node->type == MALI_PAGE_NODE_SWAP) {
+			src_node->swap_it->dma_addr = dma_addr;
+		}
+	} else if (src_node->type == MALI_PAGE_NODE_BLOCK) {
+		/*
+		* use ioremap to map src for BLOCK memory
+		*/
+		src = ioremap_nocache(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE);
+		memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE);
+		iounmap(src);
+	}
+	kunmap_atomic(dst);
+	dma_addr = dma_map_page(&mali_platform_device->dev, dst_page,
+				0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (dst_node->type == MALI_PAGE_NODE_SWAP) {
+		dst_node->swap_it->dma_addr = dma_addr;
+	}
+}
+
+
+/*
+* allocate page on demand when CPU access it,
+* THis used in page fault handler
+*/
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page)
+{
+	struct page *new_page = NULL;
+	struct mali_page_node *new_node = NULL;
+	int i = 0;
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct  mali_session_data *session = NULL;
+	mali_mem_cow *cow = &mem_bkend->cow_mem;
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page));
+
+	/* allocate new page here */
+	new_page = mali_mem_cow_alloc_page();
+	if (!new_page)
+		return _MALI_OSK_ERR_NOMEM;
+
+	new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+	if (!new_node) {
+		__free_page(new_page);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	/* find the page in backend*/
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset_page) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+	MALI_DEBUG_ASSERT(found_node);
+	if (NULL == found_node) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	_mali_page_node_add_page(new_node, new_page);
+
+	/* Copy the src page's content to new page */
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation);
+	session = mem_bkend->mali_allocation->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+	if (_mali_mem_put_page_node(found_node)) {
+		__free_page(new_page);
+		kfree(new_node);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	list_replace(&found_node->list, &new_node->list);
+
+	kfree(found_node);
+
+	/* map to GPU side*/
+	_mali_osk_mutex_wait(session->memory_lock);
+	mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE);
+	_mali_osk_mutex_signal(session->memory_lock);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_cow.h b/drivers/gpu/arm/utgard/linux/mali_memory_cow.h
new file mode 100644
index 000000000000..c16ec7633c0a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_cow.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_COW_H__
+#define __MALI_MEMORY_COW_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include "mali_memory_types.h"
+
+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend,
+		struct vm_area_struct *vma,
+		unsigned long vaddr,
+		int num);
+
+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk,
+				       u32 target_offset,
+				       u32 target_size,
+				       mali_mem_backend *backend,
+				       u32 range_start,
+				       u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk,
+		u32 target_offset,
+		u32 target_size,
+		mali_mem_backend *backend,
+		u32 range_start,
+		u32 range_size);
+
+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node);
+
+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size);
+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page);
+#endif
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.c b/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.c
new file mode 100644
index 000000000000..e51902f1bf65
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#ifdef CONFIG_ARM
+#include <asm/outercache.h>
+#endif
+#include <asm/dma-mapping.h>
+
+#include "mali_memory.h"
+#include "mali_kernel_common.h"
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory_defer_bind.h"
+#include "mali_executor.h"
+#include "mali_osk.h"
+#include "mali_scheduler.h"
+#include "mali_gp_job.h"
+
+mali_defer_bind_manager *mali_dmem_man = NULL;
+
+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job)
+{
+	return gp_job->uargs.varying_memsize / _MALI_OSK_MALI_PAGE_SIZE;
+}
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void)
+{
+	mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager));
+	if (!mali_dmem_man)
+		return _MALI_OSK_ERR_NOMEM;
+
+	atomic_set(&mali_dmem_man->num_used_pages, 0);
+	atomic_set(&mali_dmem_man->num_dmem, 0);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_defer_bind_manager_destory(void)
+{
+	if (mali_dmem_man) {
+		MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem));
+		kfree(mali_dmem_man);
+	}
+	mali_dmem_man = NULL;
+}
+
+
+/*allocate pages from OS memory*/
+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock)
+{
+	int retval = 0;
+	u32 num_pages = require;
+	mali_mem_os_mem os_mem;
+
+	retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE);
+
+	/* add to free pages list */
+	if (0 == retval) {
+		MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages));
+		list_splice(&os_mem.pages, &dblock->free_pages);
+		atomic_add(os_mem.count, &dblock->num_free_pages);
+		atomic_add(os_mem.count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		return _MALI_OSK_ERR_OK;
+	} else
+		return _MALI_OSK_ERR_FAULT;
+}
+
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock)
+{
+	u32 require_page;
+
+	if (!next_gp_job)
+		return _MALI_OSK_ERR_FAULT;
+
+	require_page = mali_dmem_get_gp_varying_size(next_gp_job);
+
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n",
+			     require_page));
+	/* allocate more pages from OS */
+	if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) {
+		MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* do preparetion for allocation before defer bind */
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list));
+	if (NULL == bk_list)
+		return _MALI_OSK_ERR_FAULT;
+
+	INIT_LIST_HEAD(&bk_list->node);
+	/* Get backend memory */
+	mutex_lock(&mali_idr_mutex);
+	if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) {
+		MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n"));
+		mutex_unlock(&mali_idr_mutex);
+		kfree(bk_list);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start));
+
+	INIT_LIST_HEAD(&mem_bkend->os_mem.pages);
+
+	bk_list->bkend = mem_bkend;
+	bk_list->vaddr = alloc->mali_vma_node.vm_node.start;
+	bk_list->session = alloc->session;
+	bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+
+	/* add to job to do list */
+	list_add(&bk_list->node, list);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+
+/* bind phyiscal memory to allocation
+This function will be called in IRQ handler*/
+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node,
+		struct list_head *pages)
+{
+	struct mali_session_data *session = bk_node->session;
+	mali_mem_backend *mem_bkend = bk_node->bkend;
+	MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session));
+
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	list_splice(pages, &mem_bkend->os_mem.pages);
+	mem_bkend->os_mem.count = bk_node->page_num;
+
+	if (mem_bkend->type == MALI_MEM_OS) {
+		mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0,
+				     mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT);
+	}
+	smp_wmb();
+	bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+	mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED;
+	return _MALI_OSK_ERR_OK;
+}
+
+
+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock)
+{
+	int i = 0;
+	struct mali_page_node *m_page, *m_tmp;
+
+	if (atomic_read(&dblock->num_free_pages) < count) {
+		return NULL;
+	} else {
+		list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) {
+			if (i < count) {
+				list_move_tail(&m_page->list, pages);
+			} else {
+				break;
+			}
+			i++;
+		}
+		MALI_DEBUG_ASSERT(i == count);
+		atomic_sub(count, &dblock->num_free_pages);
+		return pages;
+	}
+}
+
+
+/* called in job start IOCTL to bind physical memory for each allocations
+@ bk_list backend list to do defer bind
+@ pages page list to do this bind
+@ count number of pages
+*/
+_mali_osk_errcode_t mali_mem_defer_bind(u32 count, struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block)
+{
+	struct mali_defer_mem *dmem = NULL;
+	struct mali_backend_bind_list *bkn, *bkn_tmp;
+	LIST_HEAD(pages);
+
+	MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp));
+	dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem));
+	if (dmem) {
+		INIT_LIST_HEAD(&dmem->node);
+		gp->dmem = dmem;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	atomic_add(1, &mali_dmem_man->num_dmem);
+	/* for each bk_list backend, do bind */
+	list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) {
+		INIT_LIST_HEAD(&pages);
+		if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) {
+			list_del(&bkn->node);
+			mali_mem_defer_bind_allocation(bkn, &pages);
+			_mali_osk_free(bkn);
+		} else {
+			/* not enough memory will not happen */
+			MALI_DEBUG_PRINT(1, ("#BIND: NOT enough memory when binded !!## \n"));
+			MALI_DEBUG_ASSERT(0);
+		}
+	}
+
+	if (!list_empty(&gp->vary_todo)) {
+		MALI_DEBUG_ASSERT(0);
+	}
+
+	dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp)
+{
+	if (gp->dmem) {
+		atomic_dec(&mali_dmem_man->num_dmem);
+		_mali_osk_free(gp->dmem);
+	}
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.h b/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.h
new file mode 100644
index 000000000000..ef67540434f5
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_defer_bind.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_MEMORY_DEFER_BIND_H_
+#define __MALI_MEMORY_DEFER_BIND_H_
+
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_gp_job;
+
+typedef struct mali_defer_mem {
+	struct list_head node;   /*dlist node in bind manager */
+	u32 flag;
+} mali_defer_mem;
+
+
+typedef struct mali_defer_mem_block {
+	struct list_head free_pages; /* page pool */
+	atomic_t num_free_pages;
+} mali_defer_mem_block;
+
+/* varying memory list need to bind */
+typedef struct mali_backend_bind_list {
+	struct list_head node;
+	struct mali_mem_backend *bkend;
+	u32 vaddr;
+	u32 page_num;
+	struct mali_session_data *session;
+	u32 flag;
+} mali_backend_bind_lists;
+
+
+typedef struct mali_defer_bind_manager {
+	atomic_t num_used_pages;
+	atomic_t num_dmem;
+} mali_defer_bind_manager;
+
+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void);
+void mali_mem_defer_bind_manager_destory(void);
+_mali_osk_errcode_t mali_mem_defer_bind(u32 count, struct mali_gp_job *gp,
+					struct mali_defer_mem_block *dmem_block);
+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list);
+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock);
+void mali_mem_defer_dmem_free(struct mali_gp_job *gp);
+
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.c b/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.c
new file mode 100644
index 000000000000..2fa5028d8342
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/fs.h>      /* file system operations */
+#include <asm/uaccess.h>        /* user space access */
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
+#include <linux/platform_device.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+
+#include "mali_memory.h"
+#include "mali_memory_dma_buf.h"
+#include "mali_memory_virtual.h"
+#include "mali_pp_job.h"
+
+/*
+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt.
+ */
+static int mali_dma_buf_map(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_dma_buf_attachment *mem;
+	struct  mali_session_data *session;
+	struct mali_page_directory *pagedir;
+	_mali_osk_errcode_t err;
+	struct scatterlist *sg;
+	u32 virt, flags;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(mem->session == session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	flags = alloc->flags;
+
+	mali_session_memory_lock(session);
+	mem->map_ref++;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (1 == mem->map_ref) {
+
+		/* First reference taken, so we need to map the dma buf */
+		MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+		mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL);
+		if (IS_ERR_OR_NULL(mem->sgt)) {
+			MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -EFAULT;
+		}
+
+		err = mali_mem_mali_map_prepare(alloc);
+		if (_MALI_OSK_ERR_OK != err) {
+			MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n"));
+			mem->map_ref--;
+			mali_session_memory_unlock(session);
+			return -ENOMEM;
+		}
+
+		pagedir = mali_session_get_page_directory(session);
+		MALI_DEBUG_ASSERT_POINTER(pagedir);
+
+		for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) {
+			u32 size = sg_dma_len(sg);
+			dma_addr_t phys = sg_dma_address(sg);
+
+			/* sg must be page aligned. */
+			MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE);
+			MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF));
+
+			mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+			virt += size;
+		}
+
+		if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+			u32 guard_phys;
+			MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n"));
+
+			guard_phys = sg_dma_address(mem->sgt->sgl);
+			mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+		}
+
+		mem->is_mapped = MALI_TRUE;
+		mali_session_memory_unlock(session);
+		/* Wake up any thread waiting for buffer to become mapped */
+		wake_up_all(&mem->wait_queue);
+	} else {
+		MALI_DEBUG_ASSERT(mem->is_mapped);
+		mali_session_memory_unlock(session);
+	}
+
+	return 0;
+}
+
+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+
+	mali_session_memory_lock(alloc->session);
+	mem->map_ref--;
+
+	MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref));
+
+	if (0 == mem->map_ref) {
+		dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL);
+		if (MALI_TRUE == mem->is_mapped) {
+			mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+					       alloc->flags);
+		}
+		mem->is_mapped = MALI_FALSE;
+	}
+	mali_session_memory_unlock(alloc->session);
+	/* Wake up any thread waiting for buffer to become unmapped */
+	wake_up_all(&mem->wait_queue);
+}
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	_mali_osk_errcode_t err;
+	int i;
+	int ret = 0;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+
+		err = mali_dma_buf_map(mem_bkend);
+		if (0 != err) {
+			MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr));
+			ret = -EFAULT;
+			continue;
+		}
+	}
+	return ret;
+}
+
+void mali_dma_buf_unmap_job(struct mali_pp_job *job)
+{
+	struct mali_dma_buf_attachment *mem;
+	int i;
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+		if (MALI_MEM_DMA_BUF != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on CPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		mem = mem_bkend->dma_buf.attachment;
+
+		MALI_DEBUG_ASSERT_POINTER(mem);
+		MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job));
+		mali_dma_buf_unmap(mem_bkend->mali_allocation, mem);
+	}
+}
+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg)
+{
+	_mali_uk_dma_buf_get_size_s args;
+	int fd;
+	struct dma_buf *buf;
+
+	/* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */
+	if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) {
+		return -EFAULT;
+	}
+
+	/* Do DMA-BUF stuff */
+	fd = args.mem_fd;
+
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd));
+		return PTR_RET(buf);
+	}
+
+	if (0 != put_user(buf->size, &user_arg->size)) {
+		dma_buf_put(buf);
+		return -EFAULT;
+	}
+
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags)
+{
+	struct dma_buf *buf;
+	struct mali_dma_buf_attachment *dma_mem;
+	struct  mali_session_data *session = alloc->session;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* get dma buffer */
+	buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(buf)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* Currently, mapping of the full buffer are supported. */
+	if (alloc->psize != buf->size) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment));
+	if (NULL == dma_mem) {
+		goto failed_alloc_mem;
+	}
+
+	dma_mem->buf = buf;
+	dma_mem->session = session;
+	dma_mem->map_ref = 0;
+	init_waitqueue_head(&dma_mem->wait_queue);
+
+	dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev);
+	if (NULL == dma_mem->attachment) {
+		goto failed_dma_attach;
+	}
+
+	mem_backend->dma_buf.attachment = dma_mem;
+
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	/* Map memory into session's Mali virtual address space. */
+	if (0 != mali_dma_buf_map(mem_backend)) {
+		goto Failed_dma_map;
+	}
+#endif
+
+	return _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+Failed_dma_map:
+	mali_dma_buf_unmap(alloc, dma_mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(dma_mem->wait_queue, !dma_mem->is_mapped);
+	MALI_DEBUG_ASSERT(!dma_mem->is_mapped);
+	dma_buf_detach(dma_mem->buf, dma_mem->attachment);
+failed_dma_attach:
+	_mali_osk_free(dma_mem);
+failed_alloc_mem:
+	dma_buf_put(buf);
+	return _MALI_OSK_ERR_FAULT;
+}
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend)
+{
+	struct mali_dma_buf_attachment *mem;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type);
+
+	mem = mem_backend->dma_buf.attachment;
+	MALI_DEBUG_ASSERT_POINTER(mem);
+	MALI_DEBUG_ASSERT_POINTER(mem->attachment);
+	MALI_DEBUG_ASSERT_POINTER(mem->buf);
+	MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem));
+
+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+	MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation);
+	/* We mapped implicitly on attach, so we need to unmap on release */
+	mali_dma_buf_unmap(mem_backend->mali_allocation, mem);
+#endif
+	/* Wait for buffer to become unmapped */
+	wait_event(mem->wait_queue, !mem->is_mapped);
+	MALI_DEBUG_ASSERT(!mem->is_mapped);
+
+	dma_buf_detach(mem->buf, mem->attachment);
+	dma_buf_put(mem->buf);
+
+	_mali_osk_free(mem);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.h b/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.h
new file mode 100644
index 000000000000..859d3849e6b3
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_dma_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_DMA_BUF_H__
+#define __MALI_MEMORY_DMA_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+struct mali_pp_job;
+
+struct mali_dma_buf_attachment;
+struct mali_dma_buf_attachment {
+	struct dma_buf *buf;
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct mali_session_data *session;
+	int map_ref;
+	struct mutex map_lock;
+	mali_bool is_mapped;
+	wait_queue_head_t wait_queue;
+};
+
+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg);
+
+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend);
+
+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		int fd, u32 flags);
+
+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH)
+int mali_dma_buf_map_job(struct mali_pp_job *job);
+void mali_dma_buf_unmap_job(struct mali_pp_job *job);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_external.c b/drivers/gpu/arm/utgard/linux/mali_memory_external.c
new file mode 100644
index 000000000000..3733f2e0c80a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_external.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_memory.h"
+#include "mali_mem_validation.h"
+#include "mali_uk_types.h"
+
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend)
+{
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag)
+{
+	struct mali_session_data *session;
+	_mali_osk_errcode_t err;
+	u32 virt, phys, size;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	size = alloc->psize;
+	session = (struct mali_session_data *)(uintptr_t)alloc->session;
+	MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS);
+
+	/* check arguments */
+	/* NULL might be a valid Mali address */
+	if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+	/* size must be a multiple of the system page size */
+	if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS);
+
+#if 0
+	/* Validate the mali physical range */
+	if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+
+	if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mali_session_memory_lock(session);
+
+	virt = alloc->mali_vma_node.vm_node.start;
+	phys = phys_addr;
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		mali_session_memory_unlock(session);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT);
+
+	if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) {
+		mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+	}
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n",
+			  phys_addr, (phys_addr + size - 1),
+			  virt));
+	mali_session_memory_unlock(session);
+
+	MALI_SUCCESS;
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_external.h b/drivers/gpu/arm/utgard/linux/mali_memory_external.h
new file mode 100644
index 000000000000..645580b51fc9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_external.h
@@ -0,0 +1,29 @@
+
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_EXTERNAL_H__
+#define __MALI_MEMORY_EXTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc,
+		mali_mem_backend *mem_backend,
+		u32 phys_addr,
+		u32 flag);
+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_manager.c b/drivers/gpu/arm/utgard/linux/mali_memory_manager.c
new file mode 100644
index 000000000000..55e2c092d597
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_manager.c
@@ -0,0 +1,965 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include <linux/platform_device.h>
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif
+#include <linux/idr.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_util.h"
+#include "mali_memory_external.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_ukk.h"
+#include "mali_memory_swap_alloc.h"
+
+/*
+* New memory system interface
+*/
+
+/*inti idr for backend memory */
+struct idr mali_backend_idr;
+struct mutex mali_idr_mutex;
+
+/* init allocation manager */
+int mali_memory_manager_init(struct mali_allocation_manager *mgr)
+{
+	/* init Locks */
+	rwlock_init(&mgr->vm_lock);
+	mutex_init(&mgr->list_mutex);
+
+	/* init link */
+	INIT_LIST_HEAD(&mgr->head);
+
+	/* init RB tree */
+	mgr->allocation_mgr_rb = RB_ROOT;
+	mgr->mali_allocation_num = 0;
+	return 0;
+}
+
+/* Deinit allocation manager
+* Do some check for debug
+*/
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr)
+{
+	/* check RB tree is empty */
+	MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb)));
+	/* check allocation List */
+	MALI_DEBUG_ASSERT(list_empty(&mgr->head));
+}
+
+/* Prepare memory descriptor */
+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session)
+{
+	mali_mem_allocation *mali_allocation;
+
+	/* Allocate memory */
+	mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL);
+	if (NULL == mali_allocation) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n"));
+		return NULL;
+	}
+
+	MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC);
+
+	/* do init */
+	mali_allocation->flags = 0;
+	mali_allocation->session = session;
+
+	INIT_LIST_HEAD(&mali_allocation->list);
+	_mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1);
+
+	/**
+	*add to session list
+	*/
+	mutex_lock(&session->allocation_mgr.list_mutex);
+	list_add_tail(&mali_allocation->list, &session->allocation_mgr.head);
+	session->allocation_mgr.mali_allocation_num++;
+	mutex_unlock(&session->allocation_mgr.list_mutex);
+
+	return mali_allocation;
+}
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc)
+{
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(alloc->session);
+	mutex_lock(&alloc->session->allocation_mgr.list_mutex);
+	list_del(&alloc->list);
+	alloc->session->allocation_mgr.mali_allocation_num--;
+	mutex_unlock(&alloc->session->allocation_mgr.list_mutex);
+
+	kfree(alloc);
+}
+
+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize)
+{
+	mali_mem_backend *mem_backend = NULL;
+	s32 ret = -ENOSPC;
+	s32 index = -1;
+	*backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL);
+	if (NULL == *backend) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n"));
+		return -1;
+	}
+	mem_backend = *backend;
+	mem_backend->size = psize;
+	mutex_init(&mem_backend->mutex);
+	INIT_LIST_HEAD(&mem_backend->list);
+	mem_backend->using_count = 0;
+
+
+	/* link backend with id */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
+again:
+	if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) {
+		kfree(mem_backend);
+		return -ENOMEM;
+	}
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index);
+	mutex_unlock(&mali_idr_mutex);
+
+	if (-ENOSPC == ret) {
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+	if (-EAGAIN == ret)
+		goto again;
+#else
+	mutex_lock(&mali_idr_mutex);
+	ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL);
+	mutex_unlock(&mali_idr_mutex);
+	index = ret;
+	if (ret < 0) {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n"));
+		kfree(mem_backend);
+		return -ENOSPC;
+	}
+#endif
+	return index;
+}
+
+
+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle)
+{
+	mali_mem_backend *mem_backend = *backend;
+
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_backend);
+	*backend = NULL;
+}
+
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address)
+{
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0);
+	if (NULL == mali_vma_node)  {
+		MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n"));
+		return NULL;
+	}
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+	return mem_bkend;
+}
+
+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	mali_mem_os_mem tmp_os_mem;
+	s32 change_page_count;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == physical_size %  MALI_MMU_PAGE_SIZE);
+
+	mali_allocation = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type);
+
+	mutex_lock(&mem_backend->mutex);
+
+	/* Do resize*/
+	if (physical_size > mem_backend->size) {
+		u32 add_size = physical_size - mem_backend->size;
+
+		MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+		/* Allocate new pages from os mem */
+		retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size);
+
+		if (retval) {
+			if (-ENOMEM == retval) {
+				ret = _MALI_OSK_ERR_NOMEM;
+			} else {
+				ret = _MALI_OSK_ERR_FAULT;
+			}
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n"));
+			goto failed_alloc_memory;
+		}
+
+		MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE);
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory	resizing failed !\n"));
+			goto failed_resize_pages;
+		}
+
+		/*Resize cpu mapping */
+		if (NULL != mali_allocation->cpu_mapping.vma) {
+			ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start  + mem_backend->size, add_size);
+			if (unlikely(ret != _MALI_OSK_ERR_OK)) {
+				MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n"));
+				goto  failed_cpu_map;
+			}
+		}
+
+		/* Resize mali mapping */
+		_mali_osk_mutex_wait(session->memory_lock);
+		ret = mali_mem_mali_map_resize(mali_allocation, physical_size);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n"));
+			goto failed_gpu_map;
+		}
+
+		ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start,
+					   mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+		if (ret) {
+			MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n"));
+			goto failed_gpu_map;
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	} else {
+		u32 dec_size, page_count;
+		u32 vaddr = 0;
+		INIT_LIST_HEAD(&tmp_os_mem.pages);
+		tmp_os_mem.count = 0;
+
+		dec_size = mem_backend->size - physical_size;
+		MALI_DEBUG_ASSERT(0 == dec_size %  MALI_MMU_PAGE_SIZE);
+
+		page_count = dec_size / MALI_MMU_PAGE_SIZE;
+		vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size;
+
+		/* Resize the memory of the backend */
+		ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count);
+
+		if (ret) {
+			MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n"));
+			goto failed_resize_pages;
+		}
+
+		/* Resize mali map */
+		_mali_osk_mutex_wait(session->memory_lock);
+		mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags);
+		_mali_osk_mutex_signal(session->memory_lock);
+
+		/* Zap cpu mapping */
+		if (0 != mali_allocation->cpu_mapping.addr) {
+			MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma);
+			zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size);
+		}
+
+		/* Free those extra pages */
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+	}
+
+	/* Resize memory allocation and memory backend */
+	change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE;
+	mali_allocation->psize = physical_size;
+	mem_backend->size = physical_size;
+	mutex_unlock(&mem_backend->mutex);
+
+	if (change_page_count > 0) {
+		atomic_add(change_page_count, &session->mali_mem_allocated_pages);
+		if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+			session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+
+	} else {
+		atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages);
+	}
+
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+failed_cpu_map:
+	if (physical_size > mem_backend->size) {
+		mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE,
+					 (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE);
+	} else {
+		mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count);
+	}
+failed_resize_pages:
+	if (0 != tmp_os_mem.count)
+		mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE);
+failed_alloc_memory:
+
+	mutex_unlock(&mem_backend->mutex);
+	return ret;
+}
+
+
+/* Set GPU MMU properties */
+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags)
+{
+	if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) {
+		*properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE;
+	} else {
+		*properties = MALI_MMU_FLAGS_DEFAULT;
+	}
+}
+
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	u32 new_physical_size;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT(0 == add_size %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, mali_addr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	mali_allocation = mem_backend->mali_allocation;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_allocation);
+
+	new_physical_size = add_size + mem_backend->size;
+
+	if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size))
+		return ret;
+
+	MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, new_physical_size);
+
+	return ret;
+}
+
+/**
+*  function@_mali_ukk_mem_allocate - allocate mali memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args)
+{
+	struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	int retval = 0;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize));
+
+	/* Check if the address is allocated
+	*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(0);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	/**
+	*create mali memory allocation
+	*/
+
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->psize;
+	mali_allocation->vsize = args->vsize;
+
+	/* MALI_MEM_OS if need to support mem resize,
+	 * or MALI_MEM_BLOCK if have dedicated memory,
+	 * or MALI_MEM_OS,
+	 * or MALI_MEM_SWAP.
+	 */
+	if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) {
+		mali_allocation->type = MALI_MEM_SWAP;
+	} else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) {
+		mali_allocation->type = MALI_MEM_OS;
+		mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE;
+	} else if (MALI_TRUE == mali_memory_have_dedicated_memory()) {
+		mali_allocation->type = MALI_MEM_BLOCK;
+	} else {
+		mali_allocation->type = MALI_MEM_OS;
+	}
+
+	/**
+	*add allocation node to RB tree for index
+	*/
+	mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->vsize;
+
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+
+
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	mali_allocation->mali_mapping.addr = args->gpu_vaddr;
+
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+	/* do prepare for MALI mapping */
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+
+		ret = mali_mem_mali_map_prepare(mali_allocation);
+		if (0 != ret) {
+			_mali_osk_mutex_signal(session->memory_lock);
+			goto failed_prepare_map;
+		}
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+
+	if (mali_allocation->psize == 0) {
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+		goto done;
+	}
+
+	if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) {
+		mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND;
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED;
+		/* init for defer bind backend*/
+		mem_backend->os_mem.count = 0;
+		INIT_LIST_HEAD(&mem_backend->os_mem.pages);
+
+		goto done;
+	}
+	/**
+	*allocate physical memory
+	*/
+	if (likely(mali_allocation->psize > 0)) {
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			/* try to allocated from BLOCK memory first, then try OS memory if failed.*/
+			if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) {
+				retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size);
+				mem_backend->type = MALI_MEM_OS;
+				mali_allocation->type = MALI_MEM_OS;
+			}
+		} else if (MALI_MEM_SWAP == mem_backend->type) {
+			retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx);
+		} else {
+			/* ONLY support mem_os type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		if (retval) {
+			ret = _MALI_OSK_ERR_NOMEM;
+			MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n"));
+			goto failed_alloc_pages;
+		}
+	}
+
+	/**
+	*map to GPU side
+	*/
+	if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) {
+		_mali_osk_mutex_wait(session->memory_lock);
+		/* Map on Mali */
+
+		if (mem_backend->type == MALI_MEM_OS) {
+			ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0,
+						   mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties);
+
+		} else if (mem_backend->type == MALI_MEM_BLOCK) {
+			mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr,
+						mali_allocation->mali_mapping.properties);
+		} else if (mem_backend->type == MALI_MEM_SWAP) {
+			ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr,
+						     mali_allocation->mali_mapping.properties);
+		} else { /* unsupport type */
+			MALI_DEBUG_ASSERT(0);
+		}
+
+		_mali_osk_mutex_signal(session->memory_lock);
+	}
+done:
+	if (MALI_MEM_OS == mem_backend->type) {
+		atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages);
+	} else if (MALI_MEM_BLOCK == mem_backend->type) {
+		atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages);
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages);
+		atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]);
+	}
+
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_alloc_pages:
+	mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags);
+failed_prepare_map:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	u32 vaddr = args->gpu_vaddr;
+	mali_mem_allocation *mali_alloc = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+
+	/* find mali allocation structure by vaddress*/
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0);
+	if (NULL == mali_vma_node) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr));
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+	MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+	mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+
+	if (mali_alloc)
+		/* check ref_count */
+		args->free_pages_nr = mali_allocation_unref(&mali_alloc);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/**
+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address
+* It will allocate a new mem allocation and bind external memory to it.
+* Supported backend type are:
+* _MALI_MEMORY_BIND_BACKEND_UMP
+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF
+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY
+* CPU access is not supported yet
+*/
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_allocation *mali_allocation = NULL;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size));
+
+	/**
+	* allocate mali allocation.
+	*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+
+	if (mali_allocation == NULL) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->size;
+	mali_allocation->vsize = args->size;
+	mali_allocation->mali_mapping.addr = args->vaddr;
+
+	/* add allocation node to RB tree for index  */
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = args->size;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* allocate backend*/
+	if (mali_allocation->psize > 0) {
+		mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+		if (mali_allocation->backend_handle < 0) {
+			goto Failed_alloc_backend;
+		}
+
+	} else {
+		goto Failed_alloc_backend;
+	}
+
+	mem_backend->size = mali_allocation->psize;
+	mem_backend->mali_allocation = mali_allocation;
+
+	switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) {
+	case  _MALI_MEMORY_BIND_BACKEND_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_allocation->type = MALI_MEM_UMP;
+		mem_backend->type = MALI_MEM_UMP;
+		ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n"));
+			goto  Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("UMP not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case  _MALI_MEMORY_BIND_BACKEND_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_allocation->type = MALI_MEM_DMA_BUF;
+		mem_backend->type = MALI_MEM_DMA_BUF;
+		ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend,
+					    args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n"));
+			goto Failed_bind_backend;
+		}
+#else
+		MALI_DEBUG_PRINT(1, ("DMA not supported\n"));
+		goto Failed_bind_backend;
+#endif
+		break;
+	case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY:
+		/* not allowed */
+		MALI_DEBUG_ASSERT(0);
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY:
+		mali_allocation->type = MALI_MEM_EXTERNAL;
+		mem_backend->type = MALI_MEM_EXTERNAL;
+		ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr,
+					    args->mem_union.bind_ext_memory.flags);
+		if (_MALI_OSK_ERR_OK != ret) {
+			MALI_DEBUG_PRINT(1, ("Bind external buf failed\n"));
+			goto Failed_bind_backend;
+		}
+		break;
+
+	case _MALI_MEMORY_BIND_BACKEND_EXT_COW:
+		/* not allowed */
+		MALI_DEBUG_ASSERT(0);
+		break;
+
+	default:
+		MALI_DEBUG_ASSERT(0);
+		break;
+	}
+	MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE);
+	atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]);
+	return _MALI_OSK_ERR_OK;
+
+Failed_bind_backend:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+
+Failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n"));
+	return ret;
+}
+
+
+/*
+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address
+* This function unbind the backend memory and free the allocation
+* no ref_count for this type of memory
+*/
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args)
+{
+	/**/
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	mali_mem_allocation *mali_allocation = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	u32 mali_addr = args->vaddr;
+	MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr));
+
+	/* find the allocation by vaddr */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+	if (likely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start);
+		mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+	} else {
+		MALI_DEBUG_ASSERT(NULL != mali_vma_node);
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	if (NULL != mali_allocation)
+		/* check ref_count */
+		mali_allocation_unref(&mali_allocation);
+	return _MALI_OSK_ERR_OK;
+}
+
+/*
+* Function _mali_ukk_mem_cow --  COW for an allocation
+* This function allocate new pages for  a range (range, range+size) of allocation
+*  And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr
+*/
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *target_backend = NULL;
+	mali_mem_backend *mem_backend = NULL;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_allocation = NULL;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	/* Get the target backend for cow */
+	target_backend = mali_mem_backend_struct_search(session, args->target_handle);
+
+	if (NULL == target_backend || 0 == target_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(target_backend);
+		MALI_DEBUG_ASSERT(0 != target_backend->size);
+		return ret;
+	}
+
+	/*Cow not support resized mem */
+	MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags));
+
+	/* Check if the new mali address is allocated */
+	mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0);
+
+	if (unlikely(mali_vma_node)) {
+		MALI_DEBUG_ASSERT(0);
+		return ret;
+	}
+
+	/* create new alloction for COW*/
+	mali_allocation = mali_mem_allocation_struct_create(session);
+	if (mali_allocation == NULL) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n"));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	mali_allocation->psize = args->target_size;
+	mali_allocation->vsize = args->target_size;
+	mali_allocation->type = MALI_MEM_COW;
+
+	/*add allocation node to RB tree for index*/
+	mali_allocation->mali_vma_node.vm_node.start = args->vaddr;
+	mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize;
+	mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+
+	/* create new backend for COW memory */
+	mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize);
+	if (mali_allocation->backend_handle < 0) {
+		ret = _MALI_OSK_ERR_NOMEM;
+		MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n"));
+		goto failed_alloc_backend;
+	}
+	mem_backend->mali_allocation = mali_allocation;
+	mem_backend->type = mali_allocation->type;
+
+	if (target_backend->type == MALI_MEM_SWAP ||
+	    (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) {
+		mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED;
+		/**
+		 *     CoWed swap backends couldn't be mapped as non-linear vma, because if one
+		 * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel,
+		 * while in mali driver, we use this variable to store the pointer of mali_allocation, so there
+		 * is a conflict.
+		 *     To resolve this problem, we have to do some fake things, we reserved about 64MB
+		 * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM),
+		 * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these
+		 * backends will be mapped as linear and will add to priority tree of global swap file, while
+		 * these vmas will never be found by using normal page->index, these pages in those vma
+		 * also couldn't be swapped out.
+		 */
+		mem_backend->start_idx = 0;
+	}
+
+	/* Add the target backend's cow count, also allocate new pages for COW backend from os mem
+	*for a modified range and keep the page which not in the modified range and Add ref to it
+	*/
+	MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x;  cow_addr: 0x%x,  size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start,
+			     mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size));
+
+	ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size);
+	if (_MALI_OSK_ERR_OK != ret) {
+		MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n"));
+		goto failed_do_cow;
+	}
+
+	/**
+	*map to GPU side
+	*/
+	mali_allocation->mali_mapping.addr = args->vaddr;
+	/* set gpu mmu propery */
+	_mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags);
+
+	_mali_osk_mutex_wait(session->memory_lock);
+	/* Map on Mali */
+	ret = mali_mem_mali_map_prepare(mali_allocation);
+	if (0 != ret) {
+		MALI_DEBUG_PRINT(1, (" prepare map fail! \n"));
+		goto failed_gpu_map;
+	}
+
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size);
+	}
+
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	mutex_lock(&target_backend->mutex);
+	target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED;
+	mutex_unlock(&target_backend->mutex);
+
+	atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+	return _MALI_OSK_ERR_OK;
+
+failed_gpu_map:
+	_mali_osk_mutex_signal(session->memory_lock);
+	mali_mem_cow_release(mem_backend, MALI_FALSE);
+	mem_backend->cow_mem.count = 0;
+failed_do_cow:
+	mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle);
+failed_alloc_backend:
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_allocation);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+	mali_mem_backend *mem_backend = NULL;
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n"));
+	/* Get the backend that need to be modified. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend || 0 == mem_backend->size) {
+		MALI_DEBUG_ASSERT_POINTER(mem_backend);
+		MALI_DEBUG_ASSERT(0 != mem_backend->size);
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW  == mem_backend->type);
+
+	ret =  mali_memory_cow_modify_range(mem_backend, args->range_start, args->size);
+	args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	if (_MALI_OSK_ERR_OK != ret)
+		return  ret;
+	_mali_osk_mutex_wait(session->memory_lock);
+	if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) {
+		mali_mem_cow_mali_map(mem_backend, args->range_start, args->size);
+	}
+	_mali_osk_mutex_signal(session->memory_lock);
+
+	atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages);
+	if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) {
+		session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args)
+{
+	mali_mem_backend *mem_backend = NULL;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n"));
+	MALI_DEBUG_ASSERT(0 == args->psize %  MALI_MMU_PAGE_SIZE);
+
+	/* Get the memory backend that need to be resize. */
+	mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+
+	if (NULL == mem_backend)  {
+		MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n"));
+		return ret;
+	}
+
+	MALI_DEBUG_ASSERT(args->psize != mem_backend->size);
+
+	ret = mali_mem_resize(session, mem_backend, args->psize);
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args)
+{
+	args->memory_usage = _mali_ukk_report_memory_usage();
+	if (0 != args->vaddr) {
+		mali_mem_backend *mem_backend = NULL;
+		struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+		/* Get the backend that need to be modified. */
+		mem_backend = mali_mem_backend_struct_search(session, args->vaddr);
+		if (NULL == mem_backend) {
+			MALI_DEBUG_ASSERT_POINTER(mem_backend);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		if (MALI_MEM_COW == mem_backend->type)
+			args->change_pages_nr = mem_backend->cow_mem.change_pages_nr;
+	}
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_manager.h b/drivers/gpu/arm/utgard/linux/mali_memory_manager.h
new file mode 100644
index 000000000000..c454b9354676
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_manager.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_MANAGER_H__
+#define __MALI_MEMORY_MANAGER_H__
+
+#include "mali_osk.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_uk_types.h"
+
+struct mali_allocation_manager {
+	rwlock_t vm_lock;
+	struct rb_root allocation_mgr_rb;
+	struct list_head head;
+	struct mutex list_mutex;
+	u32 mali_allocation_num;
+};
+
+extern struct idr mali_backend_idr;
+extern struct mutex mali_idr_mutex;
+
+int mali_memory_manager_init(struct mali_allocation_manager *mgr);
+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr);
+
+void  mali_mem_allocation_struct_destory(mali_mem_allocation *alloc);
+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size);
+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address);
+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args);
+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args);
+
+#endif
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.c b/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.c
new file mode 100644
index 000000000000..1a6cc0649421
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.c
@@ -0,0 +1,805 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include "mali_osk.h"
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_kernel_linux.h"
+
+/* Minimum size of allocator page pool */
+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256)
+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+/* Write combine dma_attrs */
+static DEFINE_DMA_ATTRS(dma_attrs_wc);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask);
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask);
+#endif
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc);
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc);
+#endif
+#endif
+static void mali_mem_os_trim_pool(struct work_struct *work);
+
+struct mali_mem_os_allocator mali_mem_os_allocator = {
+	.pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+	.pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages),
+	.pool_count = 0,
+
+	.allocated_pages = ATOMIC_INIT(0),
+	.allocation_limit = 0,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	.shrinker.shrink = mali_mem_os_shrink,
+#else
+	.shrinker.count_objects = mali_mem_os_shrink_count,
+	.shrinker.scan_objects = mali_mem_os_shrink,
+#endif
+	.shrinker.seeks = DEFAULT_SEEKS,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE),
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
+	.timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#else
+	.timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool),
+#endif
+};
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag)
+{
+	LIST_HEAD(pages);
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	if (MALI_TRUE == cow_flag) {
+		list_for_each_entry_safe(m_page, m_tmp, os_pages, list) {
+			/*only handle OS node here */
+			if (m_page->type == MALI_PAGE_NODE_OS) {
+				if (1 == _mali_page_node_get_ref_count(m_page)) {
+					list_move(&m_page->list, &pages);
+					atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+					free_pages_nr ++;
+				} else {
+					_mali_page_node_unref(m_page);
+					m_page->page = NULL;
+					list_del(&m_page->list);
+					kfree(m_page);
+				}
+			}
+		}
+	} else {
+		list_cut_position(&pages, os_pages, os_pages->prev);
+		atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages);
+		free_pages_nr = pages_count;
+	}
+
+	/* Put pages on pool. */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_splice(&pages, &mali_mem_os_allocator.pool_pages);
+	mali_mem_os_allocator.pool_count += free_pages_nr;
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+	return free_pages_nr;
+}
+
+/**
+* put page without put it into page pool
+*/
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page)
+{
+	MALI_DEBUG_ASSERT_POINTER(page);
+	if (1 == page_count(page)) {
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	put_page(page);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_from);
+	MALI_DEBUG_ASSERT_POINTER(mem_to);
+
+	if (mem_from->count < start_page + page_count) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) {
+		if (i >= start_page && i < start_page + page_count) {
+			list_move_tail(&m_page->list, &mem_to->pages);
+			mem_from->count--;
+			mem_to->count++;
+		}
+		i++;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size)
+{
+	struct page *new_page;
+	LIST_HEAD(pages_list);
+	size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE;
+	size_t remaining = page_count;
+	struct mali_page_node *m_page, *m_tmp;
+	u32 i;
+
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&os_mem->pages);
+	os_mem->count = page_count;
+
+	/* Grab pages from pool. */
+	{
+		size_t pool_pages;
+		spin_lock(&mali_mem_os_allocator.pool_lock);
+		pool_pages = min(remaining, mali_mem_os_allocator.pool_count);
+		for (i = pool_pages; i > 0; i--) {
+			BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages));
+			list_move(mali_mem_os_allocator.pool_pages.next, &pages_list);
+		}
+		mali_mem_os_allocator.pool_count -= pool_pages;
+		remaining -= pool_pages;
+		spin_unlock(&mali_mem_os_allocator.pool_lock);
+	}
+
+	/* Process pages from pool. */
+	i = 0;
+	list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) {
+		BUG_ON(NULL == m_page);
+
+		list_move_tail(&m_page->list, &os_mem->pages);
+	}
+
+	/* Allocate new pages, if needed. */
+	for (i = 0; i < remaining; i++) {
+		dma_addr_t dma_addr;
+		gfp_t flags = __GFP_ZERO | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD;
+		int err;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+		flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+		flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+		flags |= GFP_DMA;
+#else
+		/* arm64 utgard only work on < 4G, but the kernel
+		 * didn't provide method to allocte memory < 4G
+		 */
+		MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+		new_page = alloc_page(flags);
+
+		if (unlikely(NULL == new_page)) {
+			/* Calculate the number of pages actually allocated, and free them. */
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -ENOMEM;
+		}
+
+		/* Ensure page is flushed from CPU caches. */
+		dma_addr = dma_map_page(&mali_platform_device->dev, new_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(&mali_platform_device->dev, dma_addr);
+		if (unlikely(err)) {
+			MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u",
+						new_page, err));
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+
+		/* Store page phys addr */
+		SetPagePrivate(new_page);
+		set_page_private(new_page, dma_addr);
+
+		m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS);
+		if (unlikely(NULL == m_page)) {
+			MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n"));
+			dma_unmap_page(&mali_platform_device->dev, page_private(new_page),
+				       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+			ClearPagePrivate(new_page);
+			__free_page(new_page);
+			os_mem->count = (page_count - remaining) + i;
+			atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages);
+			mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE);
+			return -EFAULT;
+		}
+		m_page->page = new_page;
+
+		list_add_tail(&m_page->list, &os_mem->pages);
+	}
+
+	atomic_add(page_count, &mali_mem_os_allocator.allocated_pages);
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+	return 0;
+}
+
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	u32 virt;
+	u32 prop = props;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+	MALI_DEBUG_ASSERT_POINTER(os_mem);
+
+	MALI_DEBUG_ASSERT(start_page <= os_mem->count);
+	MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count);
+
+	if ((start_page + mapping_pgae_num) == os_mem->count) {
+
+		virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num);
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			virt -= MALI_MMU_PAGE_SIZE;
+			if (mapping_pgae_num > 0) {
+				dma_addr_t phys = page_private(m_page->page);
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			} else {
+				break;
+			}
+			mapping_pgae_num--;
+		}
+
+	} else {
+		u32 i = 0;
+		virt = vaddr;
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+
+			if (i >= start_page) {
+				dma_addr_t phys = page_private(m_page->page);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+				/* Verify that the "physical" address is 32-bit and
+				* usable for Mali, when on a system with bus addresses
+				* wider than 32-bit. */
+				MALI_DEBUG_ASSERT(0 == (phys >> 32));
+#endif
+				mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop);
+			}
+			i++;
+			virt += MALI_MMU_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	struct page *page;
+	int ret;
+	unsigned long addr = vma->vm_start;
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	list_for_each_entry(m_page, &os_mem->pages, list) {
+		/* We should use vm_insert_page, but it does a dcache
+		 * flush which makes it way slower than remap_pfn_range or vm_insert_pfn.
+		ret = vm_insert_page(vma, addr, page);
+		*/
+		page = m_page->page;
+		ret = vm_insert_pfn(vma, addr, page_to_pfn(page));
+
+		if (unlikely(0 != ret)) {
+			return -EFAULT;
+		}
+		addr += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size)
+{
+	mali_mem_os_mem *os_mem = &mem_bkend->os_mem;
+	struct mali_page_node *m_page;
+	int ret;
+	int offset;
+	int mapping_page_num;
+	int count ;
+
+	unsigned long vstart = vma->vm_start;
+	count = 0;
+	MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS);
+	MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE);
+	MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE);
+	offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT(offset <= os_mem->count);
+	mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE;
+	MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count);
+
+	if ((offset + mapping_page_num) == os_mem->count) {
+
+		unsigned long vm_end = start_vaddr + mappig_size;
+
+		list_for_each_entry_reverse(m_page, &os_mem->pages, list) {
+
+			vm_end -= _MALI_OSK_MALI_PAGE_SIZE;
+			if (mapping_page_num > 0) {
+				ret = vm_insert_pfn(vma, vm_end, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n",
+								     ret,  offset + mapping_page_num, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			} else {
+				break;
+			}
+			mapping_page_num--;
+
+		}
+	} else {
+
+		list_for_each_entry(m_page, &os_mem->pages, list) {
+			if (count >= offset) {
+
+				ret = vm_insert_pfn(vma, vstart, page_to_pfn(m_page->page));
+
+				if (unlikely(0 != ret)) {
+					/*will return -EBUSY If the page has already been mapped into table, but it's OK*/
+					if (-EBUSY == ret) {
+						break;
+					} else {
+						MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n",
+								     ret, count, offset, os_mem->count));
+					}
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+			count++;
+			vstart += _MALI_OSK_MALI_PAGE_SIZE;
+		}
+	}
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend)
+{
+
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type);
+
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	/* Unmap the memory from the mali virtual address space. */
+	mali_mem_os_mali_unmap(alloc);
+	mutex_lock(&mem_bkend->mutex);
+	/* Free pages */
+	if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE);
+	} else {
+		free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE);
+	}
+	mutex_unlock(&mem_bkend->mutex);
+
+	MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE,
+			     free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE));
+
+	mem_bkend->os_mem.count = 0;
+	return free_pages_nr;
+}
+
+
+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128
+static struct {
+	struct {
+		mali_dma_addr phys;
+		mali_io_address mapping;
+	} page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE];
+	size_t count;
+	spinlock_t lock;
+} mali_mem_page_table_page_pool = {
+	.count = 0,
+	.lock = __SPIN_LOCK_UNLOCKED(pool_lock),
+};
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping)
+{
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM;
+	dma_addr_t tmp_phys;
+
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (0 < mali_mem_page_table_page_pool.count) {
+		u32 i = --mali_mem_page_table_page_pool.count;
+		*phys = mali_mem_page_table_page_pool.page[i].phys;
+		*mapping = mali_mem_page_table_page_pool.page[i].mapping;
+
+		ret = _MALI_OSK_ERR_OK;
+	}
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	if (_MALI_OSK_ERR_OK != ret) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		*mapping = dma_alloc_attrs(&mali_platform_device->dev,
+					   _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys,
+					   GFP_KERNEL, &dma_attrs_wc);
+#else
+		*mapping = dma_alloc_writecombine(&mali_platform_device->dev,
+						  _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL);
+#endif
+		if (NULL != *mapping) {
+			ret = _MALI_OSK_ERR_OK;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+			/* Verify that the "physical" address is 32-bit and
+			 * usable for Mali, when on a system with bus addresses
+			 * wider than 32-bit. */
+			MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32));
+#endif
+
+			*phys = (mali_dma_addr)tmp_phys;
+		}
+	}
+
+	return ret;
+}
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt)
+{
+	spin_lock(&mali_mem_page_table_page_pool.lock);
+	if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) {
+		u32 i = mali_mem_page_table_page_pool.count;
+		mali_mem_page_table_page_pool.page[i].phys = phys;
+		mali_mem_page_table_page_pool.page[i].mapping = virt;
+
+		++mali_mem_page_table_page_pool.count;
+
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+	} else {
+		spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev,
+			       _MALI_OSK_MALI_PAGE_SIZE, virt, phys,
+			       &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE, virt, phys);
+#endif
+	}
+}
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page)
+{
+	struct page *page = m_page->page;
+	MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS);
+
+	if (1  == page_count(page)) {
+		dma_unmap_page(&mali_platform_device->dev, page_private(page),
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		ClearPagePrivate(page);
+	}
+	__free_page(page);
+	m_page->page = NULL;
+	list_del(&m_page->list);
+	kfree(m_page);
+}
+
+/* The maximum number of page table pool pages to free in one go. */
+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL
+
+/* Free a certain number of pages from the page table page pool.
+ * The pool lock must be held when calling the function, and the lock will be
+ * released before returning.
+ */
+static void mali_mem_os_page_table_pool_free(size_t nr_to_free)
+{
+	mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE];
+	u32 i;
+
+	MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE);
+
+	/* Remove nr_to_free pages from the pool and store them locally on stack. */
+	for (i = 0; i < nr_to_free; i++) {
+		u32 pool_index = mali_mem_page_table_page_pool.count - i - 1;
+
+		phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys;
+		virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping;
+	}
+
+	mali_mem_page_table_page_pool.count -= nr_to_free;
+
+	spin_unlock(&mali_mem_page_table_page_pool.lock);
+
+	/* After releasing the spinlock: free the pages we removed from the pool. */
+	for (i = 0; i < nr_to_free; i++) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+		dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE,
+			       virt_arr[i], (dma_addr_t)phys_arr[i], &dma_attrs_wc);
+#else
+		dma_free_writecombine(&mali_platform_device->dev,
+				      _MALI_OSK_MALI_PAGE_SIZE,
+				      virt_arr[i], (dma_addr_t)phys_arr[i]);
+#endif
+	}
+}
+
+static void mali_mem_os_trim_page_table_page_pool(void)
+{
+	size_t nr_to_free = 0;
+	size_t nr_to_keep;
+
+	/* Keep 2 page table pages for each 1024 pages in the page cache. */
+	nr_to_keep = mali_mem_os_allocator.pool_count / 512;
+	/* And a minimum of eight pages, to accomodate new sessions. */
+	nr_to_keep += 8;
+
+	if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return;
+
+	if (nr_to_keep < mali_mem_page_table_page_pool.count) {
+		nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep;
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free);
+	}
+
+	/* Pool lock will be released by the callee. */
+	mali_mem_os_page_table_pool_free(nr_to_free);
+}
+
+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	return mali_mem_os_allocator.pool_count;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask)
+#else
+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask)
+#endif /* Linux < 2.6.35 */
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#else
+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+#endif /* Linux < 3.12.0 */
+#endif /* Linux < 3.0.0 */
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unsigned long flags;
+	struct list_head *le, pages;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+	int nr = nr_to_scan;
+#else
+	int nr = sc->nr_to_scan;
+#endif
+
+	if (0 == nr) {
+		return mali_mem_os_shrink_count(shrinker, sc);
+	}
+
+	if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) {
+		/* Not able to lock. */
+		return -1;
+	}
+
+	if (0 == mali_mem_os_allocator.pool_count) {
+		/* No pages availble */
+		spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+		return 0;
+	}
+
+	/* Release from general page pool */
+	nr = min((size_t)nr, mali_mem_os_allocator.pool_count);
+	mali_mem_os_allocator.pool_count -= nr;
+	list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+		--nr;
+		if (0 == nr) break;
+	}
+	list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) {
+		/* Pools are empty, stop timer */
+		MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count));
+		cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker);
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	return mali_mem_os_shrink_count(shrinker, sc);
+#else
+	return nr;
+#endif
+}
+
+static void mali_mem_os_trim_pool(struct work_struct *data)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	struct list_head *le;
+	LIST_HEAD(pages);
+	size_t nr_to_free;
+
+	MALI_IGNORE(data);
+
+	MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count));
+
+	/* Release from general page pool */
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES;
+		const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES);
+
+		/* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */
+		nr_to_free = max(count / 2, min_to_free);
+
+		mali_mem_os_allocator.pool_count -= nr_to_free;
+		list_for_each(le, &mali_mem_os_allocator.pool_pages) {
+			--nr_to_free;
+			if (0 == nr_to_free) break;
+		}
+		list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le);
+	}
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	list_for_each_entry_safe(m_page, m_tmp, &pages, list) {
+		mali_mem_os_free_page_node(m_page);
+	}
+
+	/* Release some pages from page table page pool */
+	mali_mem_os_trim_page_table_page_pool();
+
+	if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) {
+		MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count));
+		queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES);
+	}
+}
+
+_mali_osk_errcode_t mali_mem_os_init(void)
+{
+	mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1);
+	if (NULL == mali_mem_os_allocator.wq) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &dma_attrs_wc);
+#endif
+
+	register_shrinker(&mali_mem_os_allocator.shrinker);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_os_term(void)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	unregister_shrinker(&mali_mem_os_allocator.shrinker);
+	cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker);
+
+	if (NULL != mali_mem_os_allocator.wq) {
+		destroy_workqueue(mali_mem_os_allocator.wq);
+		mali_mem_os_allocator.wq = NULL;
+	}
+
+	spin_lock(&mali_mem_os_allocator.pool_lock);
+	list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) {
+		mali_mem_os_free_page_node(m_page);
+
+		--mali_mem_os_allocator.pool_count;
+	}
+	BUG_ON(mali_mem_os_allocator.pool_count);
+	spin_unlock(&mali_mem_os_allocator.pool_lock);
+
+	/* Release from page table page pool */
+	do {
+		u32 nr_to_free;
+
+		spin_lock(&mali_mem_page_table_page_pool.lock);
+
+		nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count);
+
+		/* Pool lock will be released by the callee. */
+		mali_mem_os_page_table_pool_free(nr_to_free);
+	} while (0 != mali_mem_page_table_page_pool.count);
+}
+
+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size)
+{
+	mali_mem_os_allocator.allocation_limit = size;
+
+	MALI_SUCCESS;
+}
+
+u32 mali_mem_os_stat(void)
+{
+	return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.h b/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.h
new file mode 100644
index 000000000000..f9ead166c455
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_os_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_OS_ALLOC_H__
+#define __MALI_MEMORY_OS_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_memory_types.h"
+
+
+/** @brief Release Mali OS memory
+ *
+ * The session memory_lock must be held when calling this function.
+ *
+ * @param mem_bkend Pointer to the mali_mem_backend to release
+ */
+u32 mali_mem_os_release(mali_mem_backend *mem_bkend);
+
+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping);
+
+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt);
+
+_mali_osk_errcode_t mali_mem_os_init(void);
+
+void mali_mem_os_term(void);
+
+u32 mali_mem_os_stat(void);
+
+void mali_mem_os_free_page_node(struct mali_page_node *m_page);
+
+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size);
+
+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag);
+
+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page);
+
+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count);
+
+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props);
+
+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc);
+
+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma);
+
+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size);
+
+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.c b/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.c
new file mode 100644
index 000000000000..a46eb198c98f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.c
@@ -0,0 +1,942 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/shmem_fs.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_memory.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_ukk.h"
+#include "mali_kernel_utilization.h"
+#include "mali_memory_swap_alloc.h"
+
+
+static struct _mali_osk_bitmap idx_mgr;
+static struct file *global_swap_file;
+static struct address_space *global_swap_space;
+static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL;
+static u32 mem_backend_swapped_pool_size;
+#ifdef MALI_MEM_SWAP_TRACKING
+static u32 mem_backend_swapped_unlock_size;
+#endif
+/* Lock order: mem_backend_swapped_pool_lock  > each memory backend's mutex lock.
+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */
+static struct mutex mem_backend_swapped_pool_lock;
+static struct list_head mem_backend_swapped_pool;
+
+extern struct mali_mem_os_allocator mali_mem_os_allocator;
+
+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024)
+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0)               /* Used to mark the given memory cookie is invalidate. */
+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_CACHE_SHIFT)
+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */
+
+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE;
+
+/**
+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too
+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed
+ * the threshold value of the swapped pool size.
+ */
+typedef enum {
+	MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100,
+	MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257,
+} _mali_mem_swap_pool_shrink_type_t;
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg);
+
+_mali_osk_errcode_t mali_mem_swap_init(void)
+{
+	gfp_t flags = __GFP_NORETRY | __GFP_NOWARN;
+
+	if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE);
+	if (IS_ERR(global_swap_file)) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping;
+
+	mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL);
+	if (NULL == mali_mem_swap_out_workq) {
+		_mali_osk_bitmap_term(&idx_mgr);
+		fput(global_swap_file);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE)
+	flags |= GFP_HIGHUSER;
+#else
+#ifdef CONFIG_ZONE_DMA32
+	flags |= GFP_DMA32;
+#else
+#ifdef CONFIG_ZONE_DMA
+	flags |= GFP_DMA;
+#else
+	/* arm64 utgard only work on < 4G, but the kernel
+	 * didn't provide method to allocte memory < 4G
+	 */
+	MALI_DEBUG_ASSERT(0);
+#endif
+#endif
+#endif
+
+	/* When we use shmem_read_mapping_page to allocate/swap-in, it will
+	 * use these flags to allocate new page if need.*/
+	mapping_set_gfp_mask(global_swap_space, flags);
+
+	mem_backend_swapped_pool_size = 0;
+#ifdef MALI_MEM_SWAP_TRACKING
+	mem_backend_swapped_unlock_size = 0;
+#endif
+	mutex_init(&mem_backend_swapped_pool_lock);
+	INIT_LIST_HEAD(&mem_backend_swapped_pool);
+
+	MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_swap_term(void)
+{
+	_mali_osk_bitmap_term(&idx_mgr);
+
+	fput(global_swap_file);
+
+	_mali_osk_wq_delete_work(mali_mem_swap_out_workq);
+
+	MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool));
+	MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size);
+
+	return;
+}
+
+struct file *mali_mem_swap_get_global_swap_file(void)
+{
+	return  global_swap_file;
+}
+
+/* Judge if swappable backend in swapped pool. */
+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	return !list_empty(&mem_bkend->list);
+}
+
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend)
+{
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+	list_del_init(&mem_bkend->list);
+
+	mutex_unlock(&mem_bkend->mutex);
+
+	mem_backend_swapped_pool_size -= mem_bkend->size;
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+static void mali_mem_swap_out_page_node(mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(page_node);
+
+	dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+	set_page_dirty(page_node->swap_it->page);
+	page_cache_release(page_node->swap_it->page);
+}
+
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) {
+		return;
+	}
+
+	mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN;
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		mali_mem_swap_out_page_node(m_page);
+	}
+
+	return;
+}
+
+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node)
+{
+	mali_page_node *m_page;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex));
+
+	list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+		if (m_page == page_node) {
+			break;
+		}
+		mali_mem_swap_out_page_node(m_page);
+	}
+}
+
+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type)
+{
+	mali_mem_backend *bkend, *tmp_bkend;
+	long system_free_size;
+	u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value;
+
+	MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock));
+
+	if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) {
+		/**
+		 * When we met that system memory is very low and Mali locked swappable memory size is less than
+		 * threshold value, and at the same time, GPU load is very low and don't need high performance,
+		 * at this condition, we can unlock more swap memory backend from swapped backends pool.
+		 */
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION;
+		temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2);
+	} else {
+		/* When we add swappable memory backends to swapped pool, we need to think that we couldn't
+		* hold too much swappable backends in Mali driver, and also we need considering performance.
+		* So there is a balance for swapping out memory backend, we should follow the following conditions:
+		* 1. Total memory size in global mem backend swapped pool is more than the defined threshold value.
+		* 2. System level free memory size is less than the defined threshold value.
+		* 3. Please note that GPU utilization problem isn't considered in this condition.
+		*/
+		gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS;
+		temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value;
+	}
+
+	/* Get system free pages number. */
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+	last_gpu_utilization = _mali_ukk_utilization_gp_pp();
+
+	if ((last_gpu_utilization < gpu_utilization_threshold_value)
+	    && (system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) {
+		list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) {
+			if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) {
+				break;
+			}
+
+			mutex_lock(&bkend->mutex);
+
+			/* check if backend is in use. */
+			if (0 < bkend->using_count) {
+				mutex_unlock(&bkend->mutex);
+				continue;
+			}
+
+			mali_mem_swap_unlock_single_mem_backend(bkend);
+			list_del_init(&bkend->list);
+			mem_backend_swapped_pool_size -= bkend->size;
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size += bkend->size;
+#endif
+			mutex_unlock(&bkend->mutex);
+		}
+	}
+
+	return;
+}
+
+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg)
+{
+	MALI_IGNORE(arg);
+
+	mutex_lock(&mem_backend_swapped_pool_lock);
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+}
+
+/**
+ * After PP job finished, we add all of swappable memory backend used by this PP
+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold
+ * value, we also need to shrink the swapped pool start from the head of the list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend)
+{
+	mutex_lock(&mem_backend_swapped_pool_lock);
+	mutex_lock(&mem_bkend->mutex);
+
+	if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) {
+		MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list));
+
+		list_del_init(&mem_bkend->list);
+		list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+		mutex_unlock(&mem_bkend->mutex);
+		mutex_unlock(&mem_backend_swapped_pool_lock);
+		return;
+	}
+
+	list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool);
+
+	mutex_unlock(&mem_bkend->mutex);
+	mem_backend_swapped_pool_size += mem_bkend->size;
+
+	mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS);
+
+	mutex_unlock(&mem_backend_swapped_pool_lock);
+	return;
+}
+
+
+u32 mali_mem_swap_idx_alloc(void)
+{
+	return _mali_osk_bitmap_alloc(&idx_mgr);
+}
+
+void mali_mem_swap_idx_free(u32 idx)
+{
+	_mali_osk_bitmap_free(&idx_mgr, idx);
+}
+
+static u32 mali_mem_swap_idx_range_alloc(u32 count)
+{
+	u32 index;
+
+	index = _mali_osk_bitmap_alloc_range(&idx_mgr, count);
+
+	return index;
+}
+
+static void mali_mem_swap_idx_range_free(u32 idx, int num)
+{
+	_mali_osk_bitmap_free_range(&idx_mgr, idx, num);
+}
+
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void)
+{
+	mali_swap_item *swap_item;
+
+	swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL);
+
+	if (NULL == swap_item) {
+		return NULL;
+	}
+
+	atomic_set(&swap_item->ref_count, 1);
+	swap_item->page = NULL;
+	atomic_add(1, &mali_mem_os_allocator.allocated_pages);
+
+	return swap_item;
+}
+
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item)
+{
+	struct inode *file_node;
+	long long start, end;
+
+	/* If this swap item is shared, we just reduce the reference counter. */
+	if (0 == atomic_dec_return(&swap_item->ref_count)) {
+		file_node = global_swap_file->f_path.dentry->d_inode;
+		start = swap_item->idx;
+		start = start << 12;
+		end = start + PAGE_SIZE;
+
+		shmem_truncate_range(file_node, start, (end - 1));
+
+		mali_mem_swap_idx_free(swap_item->idx);
+
+		atomic_sub(1, &mali_mem_os_allocator.allocated_pages);
+
+		kfree(swap_item);
+	}
+}
+
+/* Used to allocate new swap item for new memory allocation and cow page for write. */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void)
+{
+	struct mali_page_node *m_page;
+
+	m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP);
+
+	if (NULL == m_page) {
+		return NULL;
+	}
+
+	m_page->swap_it = mali_mem_swap_alloc_swap_item();
+
+	if (NULL == m_page->swap_it) {
+		kfree(m_page);
+		return NULL;
+	}
+
+	return m_page;
+}
+
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page)
+{
+
+	mali_mem_swap_free_swap_item(m_page->swap_it);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page)
+{
+	_mali_mem_swap_put_page_node(m_page);
+
+	kfree(m_page);
+
+	return;
+}
+
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(swap_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem)
+{
+	struct mali_page_node *m_page, *m_tmp;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(cow_mem);
+
+	list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) {
+		MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP);
+
+		/* free the page node and release the swap item, if the ref count is 1,
+		 * then need also free the swap item. */
+		list_del(&m_page->list);
+		if (1 == _mali_page_node_get_ref_count(m_page)) {
+			free_pages_nr++;
+		}
+
+		_mali_mem_swap_page_node_free(m_page);
+	}
+
+	return free_pages_nr;
+}
+
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped)
+{
+	mali_mem_allocation *alloc;
+	u32 free_pages_nr = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_bkend);
+	alloc = mem_bkend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	if (is_mali_mapped) {
+		mali_mem_swap_mali_unmap(alloc);
+	}
+
+	mali_memory_swap_list_backend_delete(mem_bkend);
+
+	mutex_lock(&mem_bkend->mutex);
+	/* To make sure the given memory backend was unlocked from Mali side,
+	 * and then free this memory block. */
+	mali_mem_swap_unlock_single_mem_backend(mem_bkend);
+	mutex_unlock(&mem_bkend->mutex);
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem);
+	} else {
+		free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem);
+	}
+
+	return free_pages_nr;
+}
+
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node)
+{
+	MALI_DEBUG_ASSERT(NULL != page_node);
+
+	page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx);
+
+	if (IS_ERR(page_node->swap_it->page)) {
+		MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx));
+		return MALI_FALSE;
+	}
+
+	/* Ensure page is flushed from CPU caches. */
+	page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page,
+				       0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	return MALI_TRUE;
+}
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx)
+{
+	size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE;
+	struct mali_page_node *m_page;
+	long system_free_size;
+	u32 i, index;
+	mali_bool ret;
+
+	MALI_DEBUG_ASSERT(NULL != swap_mem);
+	MALI_DEBUG_ASSERT(NULL != bkend_idx);
+	MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE);
+
+	if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) {
+		MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n",
+				     size,
+				     atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE,
+				     mali_mem_os_allocator.allocation_limit));
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	INIT_LIST_HEAD(&swap_mem->pages);
+	swap_mem->count = page_count;
+	index = mali_mem_swap_idx_range_alloc(page_count);
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) {
+		MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		m_page = _mali_mem_swap_page_node_allocate();
+
+		if (NULL == m_page) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node."));
+			swap_mem->count = i;
+
+			mali_mem_swap_free(swap_mem);
+			mali_mem_swap_idx_range_free(index + i, page_count - i);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		m_page->swap_it->idx = index + i;
+
+		ret = mali_mem_swap_in_page_node(m_page);
+
+		if (MALI_FALSE == ret) {
+			MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed."));
+			_mali_mem_swap_page_node_free(m_page);
+			mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1);
+
+			swap_mem->count = i;
+			mali_mem_swap_free(swap_mem);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		list_add_tail(&m_page->list, &swap_mem->pages);
+	}
+
+	system_free_size = global_page_state(NR_FREE_PAGES) * PAGE_SIZE;
+
+	if ((system_free_size < mali_mem_swap_out_threshold_value)
+	    && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2))
+	    && mali_utilization_enabled()) {
+		_mali_osk_wq_schedule_work(mali_mem_swap_out_workq);
+	}
+
+	*bkend_idx = index;
+	return 0;
+}
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+
+/* Insert these pages from shmem to mali page table*/
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props)
+{
+	struct mali_page_directory *pagedir = session->page_directory;
+	struct mali_page_node *m_page;
+	dma_addr_t phys;
+	u32 virt = vaddr;
+	u32 prop = props;
+
+	list_for_each_entry(m_page, &swap_mem->pages, list) {
+		MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page);
+		phys = m_page->swap_it->dma_addr;
+
+		mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop);
+		virt += MALI_MMU_PAGE_SIZE;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_in_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	struct mali_page_node *m_page;
+	mali_bool swap_in_success = MALI_TRUE;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	for (i = 0; i < num_memory_cookies; i++) {
+
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+		if (NULL == mali_vma_node) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			swap_in_success = MALI_FALSE;
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		/* Get backend memory & Map on GPU */
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		/* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in,
+		 * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which
+		 * should not be swapped out when delete job to invalide */
+		if (MALI_FALSE == swap_in_success) {
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		/* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */
+		++mem_bkend->using_count;
+
+		if (1 < mem_bkend->using_count) {
+			MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+		if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+
+
+		list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) {
+			if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) {
+				/* Don't have enough memory to swap in page, so release pages have already been swapped
+				 * in and then mark this pp job to be fail. */
+				mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page);
+				swap_in_success = MALI_FALSE;
+				break;
+			}
+		}
+
+		if (swap_in_success) {
+#ifdef MALI_MEM_SWAP_TRACKING
+			mem_backend_swapped_unlock_size -= mem_bkend->size;
+#endif
+			_mali_osk_mutex_wait(session->memory_lock);
+			mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties);
+			_mali_osk_mutex_signal(session->memory_lock);
+
+			/* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */
+			mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN);
+			mutex_unlock(&mem_bkend->mutex);
+		} else {
+			--mem_bkend->using_count;
+			/* Marking that this backend is not swapped in, need not to be processed anymore. */
+			job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS;
+			mutex_unlock(&mem_bkend->mutex);
+		}
+	}
+
+	job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_out_pages(struct mali_pp_job *job)
+{
+	u32 num_memory_cookies;
+	struct mali_session_data *session;
+	struct mali_vma_node *mali_vma_node = NULL;
+	mali_mem_allocation *mali_alloc = NULL;
+	mali_mem_backend *mem_bkend = NULL;
+	int i;
+
+	MALI_DEBUG_ASSERT_POINTER(job);
+
+	num_memory_cookies = mali_pp_job_num_memory_cookies(job);
+	session = mali_pp_job_get_session(job);
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+
+	for (i = 0; i < num_memory_cookies; i++) {
+		u32 mali_addr  = mali_pp_job_get_memory_cookie(job, i);
+
+		if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) {
+			continue;
+		}
+
+		mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0);
+
+		if (NULL == mali_vma_node) {
+			MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr));
+			continue;
+		}
+
+		mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node);
+		MALI_DEBUG_ASSERT(NULL != mali_alloc);
+
+		if (MALI_MEM_SWAP != mali_alloc->type &&
+		    MALI_MEM_COW != mali_alloc->type) {
+			continue;
+		}
+
+		mutex_lock(&mali_idr_mutex);
+		mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+		mutex_unlock(&mali_idr_mutex);
+		MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+		/* We neednot hold backend's lock here, race safe.*/
+		if ((MALI_MEM_COW == mem_bkend->type) &&
+		    (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) {
+			continue;
+		}
+
+		mutex_lock(&mem_bkend->mutex);
+
+		MALI_DEBUG_ASSERT(0 < mem_bkend->using_count);
+
+		/* Reducing the using_count of mem backend means less pp job are using this memory backend,
+		 * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */
+		--mem_bkend->using_count;
+
+		if (0 < mem_bkend->using_count) {
+			mutex_unlock(&mem_bkend->mutex);
+			continue;
+		}
+		mutex_unlock(&mem_bkend->mutex);
+
+		mali_memory_swap_list_backend_add(mem_bkend);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL;
+	struct page *found_page;
+	mali_mem_swap *swap = NULL;
+	mali_mem_cow *cow = NULL;
+	dma_addr_t dma_addr;
+	u32 i = 0;
+
+	if (MALI_MEM_SWAP == mem_bkend->type) {
+		swap = &mem_bkend->swap_mem;
+		list_for_each_entry(m_page, &swap->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	} else {
+		MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+		MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags));
+
+		cow = &mem_bkend->cow_mem;
+		list_for_each_entry(m_page, &cow->pages, list) {
+			if (i == offset) {
+				found_node = m_page;
+				break;
+			}
+			i++;
+		}
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx);
+
+	if (!IS_ERR(found_page)) {
+		lock_page(found_page);
+		dma_addr = dma_map_page(&mali_platform_device->dev, found_page,
+					0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+		dma_unmap_page(&mali_platform_device->dev, dma_addr,
+			       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+		*pagep = found_page;
+	} else {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep)
+{
+	struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL;
+	mali_mem_cow *cow = NULL;
+	u32 i = 0;
+
+	MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type);
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED));
+	MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags));
+	MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend));
+
+	cow = &mem_bkend->cow_mem;
+	list_for_each_entry(m_page, &cow->pages, list) {
+		if (i == offset) {
+			found_node = m_page;
+			break;
+		}
+		i++;
+	}
+
+	if (NULL == found_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node = _mali_mem_swap_page_node_allocate();
+
+	if (NULL == new_node) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	new_node->swap_it->idx = mali_mem_swap_idx_alloc();
+
+	if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) {
+		MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n"));
+		kfree(new_node->swap_it);
+		kfree(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) {
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	/* swap in found node for copy in kernel. */
+	if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) {
+		mali_mem_swap_out_page_node(new_node);
+		_mali_mem_swap_page_node_free(new_node);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_mem_cow_copy_page(found_node, new_node);
+
+	list_replace(&found_node->list, &new_node->list);
+
+	if (1 != _mali_page_node_get_ref_count(found_node)) {
+		atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages);
+		if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) {
+			mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE;
+		}
+		mem_bkend->cow_mem.change_pages_nr++;
+	}
+
+	mali_mem_swap_out_page_node(found_node);
+	_mali_mem_swap_page_node_free(found_node);
+
+	/* When swap in the new page node, we have called dma_map_page for this page.\n */
+	dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr,
+		       _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE);
+
+	lock_page(new_node->swap_it->page);
+
+	*pagep = new_node->swap_it->page;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size)
+{
+	*swap_pool_size = mem_backend_swapped_pool_size;
+	*unlock_size =  mem_backend_swapped_unlock_size;
+}
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.h b/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.h
new file mode 100644
index 000000000000..a393ecce3a00
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_swap_alloc.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__
+#define __MALI_MEMORY_SWAP_ALLOC_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+
+#include "mali_memory_types.h"
+#include "mali_pp_job.h"
+
+/**
+ * Initialize memory swapping module.
+ */
+_mali_osk_errcode_t mali_mem_swap_init(void);
+
+void mali_mem_swap_term(void);
+
+/**
+ * Return global share memory file to other modules.
+ */
+struct file *mali_mem_swap_get_global_swap_file(void);
+
+/**
+ * Unlock the given memory backend and pages in it could be swapped out by kernel.
+ */
+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend);
+
+/**
+ * Remove the given memory backend from global swap list.
+ */
+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend);
+
+/**
+ * Add the given memory backend to global swap list.
+ */
+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend);
+
+/**
+ * Allocate 1 index from bitmap used as page index in global swap file.
+ */
+u32 mali_mem_swap_idx_alloc(void);
+
+void mali_mem_swap_idx_free(u32 idx);
+
+/**
+ * Allocate a new swap item without page index.
+ */
+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void);
+
+/**
+ * Free a swap item, truncate the corresponding space in page cache and free index of page.
+ */
+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item);
+
+/**
+ * Allocate a page node with swap item.
+ */
+struct mali_page_node *_mali_mem_swap_page_node_allocate(void);
+
+/**
+ * Reduce the reference count of given page node and if return 0, just free this page node.
+ */
+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page);
+
+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page);
+
+/**
+ * Free a swappable memory backend.
+ */
+u32 mali_mem_swap_free(mali_mem_swap *swap_mem);
+
+/**
+ * Ummap and free.
+ */
+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped);
+
+/**
+ * Read in a page from global swap file with the pre-allcated page index.
+ */
+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node);
+
+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx);
+
+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props);
+
+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc);
+
+/**
+ * When pp job created, we need swap in all of memory backend needed by this pp job.
+ */
+int mali_mem_swap_in_pages(struct mali_pp_job *job);
+
+/**
+ * Put all of memory backends used this pp job to the global swap list.
+ */
+int mali_mem_swap_out_pages(struct mali_pp_job *job);
+
+/**
+ * This will be called in page fault to process CPU read&write.
+ */
+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ;
+
+/**
+ * Used to process cow on demand for swappable memory backend.
+ */
+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep);
+
+#ifdef MALI_MEM_SWAP_TRACKING
+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size);
+#endif
+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_types.h b/drivers/gpu/arm/utgard/linux/mali_memory_types.h
new file mode 100644
index 000000000000..82af8fed66c9
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_types.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_TYPES_H__
+#define __MALI_MEMORY_TYPES_H__
+
+#include <linux/mm.h>
+
+#if defined(CONFIG_MALI400_UMP)
+#include "ump_kernel_interface.h"
+#endif
+
+typedef u32 mali_address_t;
+
+typedef enum mali_mem_type {
+	MALI_MEM_OS,
+	MALI_MEM_EXTERNAL,
+	MALI_MEM_SWAP,
+	MALI_MEM_DMA_BUF,
+	MALI_MEM_UMP,
+	MALI_MEM_BLOCK,
+	MALI_MEM_COW,
+	MALI_MEM_TYPE_MAX,
+} mali_mem_type;
+
+typedef struct mali_block_item {
+	/* for block type, the block_phy is alway page size align
+	* so use low 12bit used for ref_cout.
+	*/
+	unsigned long phy_addr;
+} mali_block_item;
+
+/**
+ * idx is used to locate the given page in the address space of swap file.
+ * ref_count is used to mark how many memory backends are using this item.
+ */
+typedef struct mali_swap_item {
+	u32 idx;
+	atomic_t ref_count;
+	struct page *page;
+	dma_addr_t dma_addr;
+} mali_swap_item;
+
+typedef enum mali_page_node_type {
+	MALI_PAGE_NODE_OS,
+	MALI_PAGE_NODE_BLOCK,
+	MALI_PAGE_NODE_SWAP,
+} mali_page_node_type;
+
+typedef struct mali_page_node {
+	struct list_head list;
+	union {
+		struct page *page;
+		mali_block_item *blk_it; /*pointer to block item*/
+		mali_swap_item *swap_it;
+	};
+
+	u32 type;
+} mali_page_node;
+
+typedef struct mali_mem_os_mem {
+	struct list_head pages;
+	u32 count;
+} mali_mem_os_mem;
+
+typedef struct mali_mem_dma_buf {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+	struct mali_dma_buf_attachment *attachment;
+#endif
+} mali_mem_dma_buf;
+
+typedef struct mali_mem_external {
+	dma_addr_t phys;
+	u32 size;
+} mali_mem_external;
+
+typedef struct mali_mem_ump {
+#if defined(CONFIG_MALI400_UMP)
+	ump_dd_handle handle;
+#endif
+} mali_mem_ump;
+
+typedef struct block_allocator_allocation {
+	/* The list will be released in reverse order */
+	struct block_info *last_allocated;
+	u32 mapping_length;
+	struct block_allocator *info;
+} block_allocator_allocation;
+
+typedef struct mali_mem_block_mem {
+	struct list_head pfns;
+	u32 count;
+} mali_mem_block_mem;
+
+typedef struct mali_mem_virt_mali_mapping {
+	mali_address_t addr; /* Virtual Mali address */
+	u32 properties;      /* MMU Permissions + cache, must match MMU HW */
+} mali_mem_virt_mali_mapping;
+
+typedef struct mali_mem_virt_cpu_mapping {
+	void __user *addr;
+	struct vm_area_struct *vma;
+} mali_mem_virt_cpu_mapping;
+
+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c
+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010
+
+typedef struct mali_mm_node {
+	/* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/
+	uint32_t start; /* GPU vaddr */
+	uint32_t size;  /* GPU allocation virtual size */
+	unsigned allocated : 1;
+} mali_mm_node;
+
+typedef struct mali_vma_node {
+	struct mali_mm_node vm_node;
+	struct rb_node vm_rb;
+} mali_vma_node;
+
+
+typedef struct mali_mem_allocation {
+	MALI_DEBUG_CODE(u32 magic);
+	mali_mem_type type;                /**< Type of memory */
+	u32 flags;                         /**< Flags for this allocation */
+
+	struct mali_session_data *session; /**< Pointer to session that owns the allocation */
+
+	mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */
+	mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */
+
+	/* add for new memory system */
+	struct mali_vma_node mali_vma_node;
+	u32 vsize; /* virtual size*/
+	u32 psize; /* physical backend memory size*/
+	struct list_head list;
+	s32 backend_handle; /* idr for mem_backend */
+	_mali_osk_atomic_t mem_alloc_refcount;
+} mali_mem_allocation;
+
+struct mali_mem_os_allocator {
+	spinlock_t pool_lock;
+	struct list_head pool_pages;
+	size_t pool_count;
+
+	atomic_t allocated_pages;
+	size_t allocation_limit;
+
+	struct shrinker shrinker;
+	struct delayed_work timed_shrinker;
+	struct workqueue_struct *wq;
+};
+
+/* COW backend memory type */
+typedef struct mali_mem_cow {
+	struct list_head pages;  /**< all pages for this cow backend allocation,
+                                                                including new allocated pages for modified range*/
+	u32 count;               /**< number of pages */
+	s32 change_pages_nr;
+} mali_mem_cow;
+
+typedef struct mali_mem_swap {
+	struct list_head pages;
+	u32 count;
+} mali_mem_swap;
+
+#define MALI_MEM_BACKEND_FLAG_COWED                   (0x1)  /* COW has happen on this backend */
+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE        (0x2)  /* This is an COW backend, mapped as not allowed cpu to write */
+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED              (0x4)  /* Mark the given backend is cowed from swappable memory. */
+/* Mark this backend is not swapped_in in MALI driver, and before using it,
+ * we should swap it in and set up corresponding page table. */
+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN            (0x8)
+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED              (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */
+#define MALI_MEM_BACKEND_FLAG_BINDED              (0x1 << 6) /* this backend it back with physical memory, used for defer bind */
+
+typedef struct mali_mem_backend {
+	mali_mem_type type;                /**< Type of backend memory */
+	u32 flags;                         /**< Flags for this allocation */
+	u32 size;
+	/* Union selected by type. */
+	union {
+		mali_mem_os_mem os_mem;       /**< MALI_MEM_OS */
+		mali_mem_external ext_mem;    /**< MALI_MEM_EXTERNAL */
+		mali_mem_dma_buf dma_buf;     /**< MALI_MEM_DMA_BUF */
+		mali_mem_ump ump_mem;         /**< MALI_MEM_UMP */
+		mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */
+		mali_mem_cow cow_mem;
+		mali_mem_swap swap_mem;
+	};
+	mali_mem_allocation *mali_allocation;
+	struct mutex mutex;
+	mali_mem_type cow_type;
+
+	struct list_head list;           /**< Used to link swappable memory backend to the global swappable list */
+	int using_count;                 /**< Mark how many PP jobs are using this memory backend */
+	u32 start_idx;                   /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */
+} mali_mem_backend;
+
+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE)
+#define MALI_MEM_FLAG_DONT_CPU_MAP    (1 << 1)
+#define MALI_MEM_FLAG_CAN_RESIZE  (_MALI_MEMORY_ALLOCATE_RESIZEABLE)
+#endif /* __MALI_MEMORY_TYPES__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_ump.c b/drivers/gpu/arm/utgard/linux/mali_memory_ump.c
new file mode 100644
index 000000000000..bacadc85e47e
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_ump.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_kernel_linux.h"
+#include "mali_memory.h"
+#include "ump_kernel_interface.h"
+
+static int mali_mem_ump_map(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	struct mali_session_data *session;
+	u32 nr_blocks;
+	u32 i;
+	ump_dd_physical_block *ump_blocks;
+	struct mali_page_directory *pagedir;
+	u32 offset = 0;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	nr_blocks = ump_dd_phys_block_count_get(ump_mem);
+	if (nr_blocks == 0) {
+		MALI_DEBUG_PRINT(1, ("No block count\n"));
+		return -EINVAL;
+	}
+
+	ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks);
+	if (NULL == ump_blocks) {
+		return -ENOMEM;
+	}
+
+	if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) {
+		_mali_osk_free(ump_blocks);
+		return -EFAULT;
+	}
+
+	pagedir = session->page_directory;
+
+	mali_session_memory_lock(session);
+
+	err = mali_mem_mali_map_prepare(alloc);
+	if (_MALI_OSK_ERR_OK != err) {
+		MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n"));
+
+		_mali_osk_free(ump_blocks);
+		mali_session_memory_unlock(session);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_blocks; ++i) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr,
+					ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += ump_blocks[i].size;
+	}
+
+	if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		u32 virt = alloc->mali_vma_node.vm_node.start + offset;
+
+		/* Map in an extra virtual guard page at the end of the VMA */
+		MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n"));
+
+		mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT);
+
+		offset += _MALI_OSK_MALI_PAGE_SIZE;
+	}
+	mali_session_memory_unlock(session);
+	_mali_osk_free(ump_blocks);
+	return 0;
+}
+
+static void mali_mem_ump_unmap(mali_mem_allocation *alloc)
+{
+	struct mali_session_data *session;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	session = alloc->session;
+	MALI_DEBUG_ASSERT_POINTER(session);
+	mali_session_memory_lock(session);
+	mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start,
+			       alloc->flags);
+	mali_session_memory_unlock(session);
+}
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags)
+{
+	ump_dd_handle ump_mem;
+	int ret;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+
+	MALI_DEBUG_PRINT(3,
+			 ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n",
+			  secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size));
+
+	ump_mem = ump_dd_handle_create_from_secure_id(secure_id);
+	if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT);
+	alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP;
+	if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) {
+		alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE;
+	}
+
+	mem_backend->ump_mem.handle = ump_mem;
+
+	ret = mali_mem_ump_map(mem_backend);
+	if (0 != ret) {
+		ump_dd_reference_release(ump_mem);
+		return _MALI_OSK_ERR_FAULT;
+	}
+	MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n"));
+	return _MALI_OSK_ERR_OK;
+}
+
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend)
+{
+	ump_dd_handle ump_mem;
+	mali_mem_allocation *alloc;
+	MALI_DEBUG_ASSERT_POINTER(mem_backend);
+	MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type);
+	ump_mem = mem_backend->ump_mem.handle;
+	MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem);
+
+	alloc = mem_backend->mali_allocation;
+	MALI_DEBUG_ASSERT_POINTER(alloc);
+	mali_mem_ump_unmap(alloc);
+	ump_dd_reference_release(ump_mem);
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_ump.h b/drivers/gpu/arm/utgard/linux/mali_memory_ump.h
new file mode 100644
index 000000000000..4adb70df884e
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_ump.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UMP_BUF_H__
+#define __MALI_MEMORY_UMP_BUF_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+#include "mali_memory.h"
+
+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32  secure_id, u32 flags);
+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MEMORY_DMA_BUF_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_util.c b/drivers/gpu/arm/utgard/linux/mali_memory_util.c
new file mode 100644
index 000000000000..bcca43c09f0b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_util.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+
+#include "mali_memory.h"
+#include "mali_memory_os_alloc.h"
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include "mali_memory_dma_buf.h"
+#endif
+#if defined(CONFIG_MALI400_UMP)
+#include "mali_memory_ump.h"
+#endif
+#include "mali_memory_external.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+#include "mali_memory_cow.h"
+#include "mali_memory_block_alloc.h"
+#include "mali_memory_swap_alloc.h"
+
+
+
+/**
+*function @_mali_free_allocation_mem - free a memory allocation
+*/
+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc)
+{
+	mali_mem_backend *mem_bkend = NULL;
+	u32 free_pages_nr = 0;
+
+	struct mali_session_data *session = mali_alloc->session;
+	MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize));
+	if (0 == mali_alloc->psize)
+		goto out;
+
+	/* Get backend memory & Map on CPU */
+	mutex_lock(&mali_idr_mutex);
+	mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	MALI_DEBUG_ASSERT(NULL != mem_bkend);
+
+	switch (mem_bkend->type) {
+	case MALI_MEM_OS:
+		free_pages_nr = mali_mem_os_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_UMP:
+#if defined(CONFIG_MALI400_UMP)
+		mali_mem_unbind_ump_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(2, ("UMP not supported\n"));
+#endif
+		break;
+	case MALI_MEM_DMA_BUF:
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		mali_mem_unbind_dma_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+#else
+		MALI_DEBUG_PRINT(2, ("DMA not supported\n"));
+#endif
+		break;
+	case MALI_MEM_EXTERNAL:
+		mali_mem_unbind_ext_buf(mem_bkend);
+		atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]);
+		break;
+
+	case MALI_MEM_BLOCK:
+		free_pages_nr = mali_mem_block_release(mem_bkend);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+
+	case MALI_MEM_COW:
+		if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) {
+			free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		} else {
+			free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE);
+		}
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		break;
+	case MALI_MEM_SWAP:
+		free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE);
+		atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages);
+		atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]);
+		break;
+	default:
+		MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type));
+		break;
+	}
+
+	/*Remove backend memory idex */
+	mutex_lock(&mali_idr_mutex);
+	idr_remove(&mali_backend_idr, mali_alloc->backend_handle);
+	mutex_unlock(&mali_idr_mutex);
+	kfree(mem_bkend);
+out:
+	/* remove memory allocation  */
+	mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node);
+	mali_mem_allocation_struct_destory(mali_alloc);
+	return free_pages_nr;
+}
+
+/**
+*  ref_count for allocation
+*/
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc)
+{
+	u32 free_pages_nr = 0;
+	mali_mem_allocation *mali_alloc = *alloc;
+	*alloc = NULL;
+	if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) {
+		free_pages_nr = _mali_free_allocation_mem(mali_alloc);
+	}
+	return free_pages_nr;
+}
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc)
+{
+	_mali_osk_atomic_inc(&alloc->mem_alloc_refcount);
+}
+
+void mali_free_session_allocations(struct mali_session_data *session)
+{
+	struct mali_mem_allocation *entry, *next;
+
+	MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n"));
+
+	list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) {
+		mali_allocation_unref(&entry);
+	}
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_util.h b/drivers/gpu/arm/utgard/linux/mali_memory_util.h
new file mode 100644
index 000000000000..0d686979d7a8
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_util.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_MEMORY_UTIL_H__
+#define __MALI_MEMORY_UTIL_H__
+
+u32 mali_allocation_unref(struct mali_mem_allocation **alloc);
+
+void mali_allocation_ref(struct mali_mem_allocation *alloc);
+
+void mali_free_session_allocations(struct mali_session_data *session);
+
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_virtual.c b/drivers/gpu/arm/utgard/linux/mali_memory_virtual.c
new file mode 100644
index 000000000000..35f8f90a444d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_virtual.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/fs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_kernel_linux.h"
+#include "mali_scheduler.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+#include "mali_memory_virtual.h"
+
+
+/**
+*internal helper to link node into the rb-tree
+*/
+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr,
+		struct mali_vma_node *node)
+{
+	struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct mali_vma_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			MALI_DEBUG_ASSERT(0);
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb);
+}
+
+/**
+ * mali_vma_offset_add() - Add offset node to RB Tree
+ */
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node)
+{
+	int ret = 0;
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		goto out;
+	}
+
+	_mali_vma_offset_add_rb(mgr, node);
+	/* set to allocated */
+	node->vm_node.allocated = 1;
+
+out:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+
+/**
+ * mali_vma_offset_remove() - Remove offset node from RB tree
+ */
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (node->vm_node.allocated) {
+		rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+	write_unlock(&mgr->vm_lock);
+}
+
+/**
+* mali_vma_offset_search - Search the node in RB tree
+*/
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start, unsigned long pages)
+{
+	struct mali_vma_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+	read_lock(&mgr->vm_lock);
+
+	iter = mgr->allocation_mgr_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct mali_vma_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset <= start + pages)
+			best = NULL;
+	}
+	read_unlock(&mgr->vm_lock);
+
+	return best;
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_memory_virtual.h b/drivers/gpu/arm/utgard/linux/mali_memory_virtual.h
new file mode 100644
index 000000000000..3e2d48f44e28
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_memory_virtual.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __MALI_GPU_VMEM_H__
+#define __MALI_GPU_VMEM_H__
+
+#include "mali_osk.h"
+#include "mali_session.h"
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "mali_memory_types.h"
+#include "mali_memory_os_alloc.h"
+#include "mali_memory_manager.h"
+
+
+
+int mali_vma_offset_add(struct mali_allocation_manager *mgr,
+			struct mali_vma_node *node);
+
+void mali_vma_offset_remove(struct mali_allocation_manager *mgr,
+			    struct mali_vma_node *node);
+
+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr,
+		unsigned long start,    unsigned long pages);
+
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_atomics.c b/drivers/gpu/arm/utgard/linux/mali_osk_atomics.c
new file mode 100644
index 000000000000..ba630e2730cf
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_atomics.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_atomics.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <asm/atomic.h>
+#include "mali_kernel_common.h"
+
+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom)
+{
+	atomic_dec((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_dec_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom)
+{
+	atomic_inc((atomic_t *)&atom->u.val);
+}
+
+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom)
+{
+	return atomic_inc_return((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val)
+{
+	MALI_DEBUG_ASSERT_POINTER(atom);
+	atomic_set((atomic_t *)&atom->u.val, val);
+}
+
+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom)
+{
+	return atomic_read((atomic_t *)&atom->u.val);
+}
+
+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom)
+{
+	MALI_IGNORE(atom);
+}
+
+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val)
+{
+	return atomic_xchg((atomic_t *)&atom->u.val, val);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_bitmap.c b/drivers/gpu/arm/utgard/linux/mali_osk_bitmap.c
new file mode 100644
index 000000000000..01ca38235b20
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_bitmap.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_bitmap.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include "common/mali_kernel_common.h"
+#include "mali_osk_types.h"
+#include "mali_osk.h"
+
+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+
+	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve);
+
+	if (obj < bitmap->max) {
+		set_bit(obj, bitmap->table);
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1)
+		--bitmap->avail;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_bitmap_free_range(bitmap, obj, 1);
+}
+
+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt)
+{
+	u32 obj;
+
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (0 >= cnt) {
+		return -1;
+	}
+
+	if (1 == cnt) {
+		return _mali_osk_bitmap_alloc(bitmap);
+	}
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+					 bitmap->last, cnt, 0);
+
+	if (obj >= bitmap->max) {
+		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+						 bitmap->reserve, cnt, 0);
+	}
+
+	if (obj < bitmap->max) {
+		bitmap_set(bitmap->table, obj, cnt);
+
+		bitmap->last = (obj + cnt);
+		if (bitmap->last >= bitmap->max) {
+			bitmap->last = bitmap->reserve;
+		}
+	} else {
+		obj = -1;
+	}
+
+	if (obj != -1) {
+		bitmap->avail -= cnt;
+	}
+
+	_mali_osk_spinlock_unlock(bitmap->lock);
+
+	return obj;
+}
+
+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	return bitmap->avail;
+}
+
+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	_mali_osk_spinlock_lock(bitmap->lock);
+	bitmap_clear(bitmap->table, obj, cnt);
+	bitmap->last = min(bitmap->last, obj);
+
+	bitmap->avail += cnt;
+	_mali_osk_spinlock_unlock(bitmap->lock);
+}
+
+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+	MALI_DEBUG_ASSERT(reserve <= num);
+
+	bitmap->reserve = reserve;
+	bitmap->last = reserve;
+	bitmap->max  = num;
+	bitmap->avail = num - reserve;
+	bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST);
+	if (!bitmap->lock) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+				sizeof(long), GFP_KERNEL);
+	if (!bitmap->table) {
+		_mali_osk_spinlock_term(bitmap->lock);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap)
+{
+	MALI_DEBUG_ASSERT_POINTER(bitmap);
+
+	if (NULL != bitmap->lock) {
+		_mali_osk_spinlock_term(bitmap->lock);
+	}
+
+	if (NULL != bitmap->table) {
+		kfree(bitmap->table);
+	}
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_irq.c b/drivers/gpu/arm/utgard/linux/mali_osk_irq.c
new file mode 100644
index 000000000000..539832d9125a
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_irq.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_irq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+typedef struct _mali_osk_irq_t_struct {
+	u32 irqnum;
+	void *data;
+	_mali_osk_irq_uhandler_t uhandler;
+} mali_osk_irq_object_t;
+
+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *);
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id);   /* , struct pt_regs *regs*/
+
+#if defined(DEBUG)
+
+struct test_interrupt_data {
+	_mali_osk_irq_ack_t ack_func;
+	void *probe_data;
+	mali_bool interrupt_received;
+	wait_queue_head_t wq;
+};
+
+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) {
+		data->interrupt_received = MALI_TRUE;
+		wake_up(&data->wq);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static _mali_osk_errcode_t test_interrupt(u32 irqnum,
+		_mali_osk_irq_trigger_t trigger_func,
+		_mali_osk_irq_ack_t ack_func,
+		void *probe_data,
+		const char *description)
+{
+	unsigned long irq_flags = 0;
+	struct test_interrupt_data data = {
+		.ack_func = ack_func,
+		.probe_data = probe_data,
+		.interrupt_received = MALI_FALSE,
+	};
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	init_waitqueue_head(&data.wq);
+
+	trigger_func(probe_data);
+	wait_event_timeout(data.wq, data.interrupt_received, 100);
+
+	free_irq(irqnum, &data);
+
+	if (data.interrupt_received) {
+		MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description));
+		return _MALI_OSK_ERR_OK;
+	} else {
+		MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum));
+		return _MALI_OSK_ERR_FAULT;
+	}
+}
+
+#endif /* defined(DEBUG) */
+
+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description)
+{
+	mali_osk_irq_object_t *irq_object;
+	unsigned long irq_flags = 0;
+
+#if defined(CONFIG_MALI_SHARED_INTERRUPTS)
+	irq_flags |= IRQF_SHARED;
+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */
+
+	irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL);
+	if (NULL == irq_object) {
+		return NULL;
+	}
+
+	if (-1 == irqnum) {
+		/* Probe for IRQ */
+		if ((NULL != trigger_func) && (NULL != ack_func)) {
+			unsigned long probe_count = 3;
+			_mali_osk_errcode_t err;
+			int irq;
+
+			MALI_DEBUG_PRINT(2, ("Probing for irq\n"));
+
+			do {
+				unsigned long mask;
+
+				mask = probe_irq_on();
+				trigger_func(probe_data);
+
+				_mali_osk_time_ubusydelay(5);
+
+				irq = probe_irq_off(mask);
+				err = ack_func(probe_data);
+			} while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--);
+
+			if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1;
+			else irqnum = irq;
+		} else irqnum = -1; /* no probe functions, fault */
+
+		if (-1 != irqnum) {
+			/* found an irq */
+			MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum));
+		} else {
+			MALI_DEBUG_PRINT(2, ("Probe for irq failed\n"));
+		}
+	}
+
+	irq_object->irqnum = irqnum;
+	irq_object->uhandler = uhandler;
+	irq_object->data = int_data;
+
+	if (-1 == irqnum) {
+		MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+#if defined(DEBUG)
+	/* Verify that the configured interrupt settings are working */
+	if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) {
+		MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description));
+		kfree(irq_object);
+		return NULL;
+	}
+#endif
+
+	if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) {
+		MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description));
+		kfree(irq_object);
+		return NULL;
+	}
+
+	return irq_object;
+}
+
+void _mali_osk_irq_term(_mali_osk_irq_t *irq)
+{
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq;
+	free_irq(irq_object->irqnum, irq_object);
+	kfree(irq_object);
+}
+
+
+/** This function is called directly in interrupt context from the OS just after
+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel.
+ * It is registered one of these function for each mali core. When an interrupt
+ * arrives this function will be called equal times as registered mali cores.
+ * That means that we only check one mali core in one function call, and the
+ * core we check for each turn is given by the \a dev_id variable.
+ * If we detect an pending interrupt on the given core, we mask the interrupt
+ * out by settging the core's IRQ_MASK register to zero.
+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority
+ * work queue job.
+ */
+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id)   /* , struct pt_regs *regs*/
+{
+	irqreturn_t ret = IRQ_NONE;
+	mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id;
+
+	if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) {
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_locks.c b/drivers/gpu/arm/utgard/linux/mali_osk_locks.c
new file mode 100644
index 000000000000..50c0a9d23819
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_locks.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk_locks.h"
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+
+
+#ifdef DEBUG
+#ifdef LOCK_ORDER_CHECKING
+static DEFINE_SPINLOCK(lock_tracking_lock);
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid);
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order);
+#endif /* LOCK_ORDER_CHECKING */
+
+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+{
+	checker->orig_flags = flags;
+	checker->owner = 0;
+
+#ifdef LOCK_ORDER_CHECKING
+	checker->order = order;
+	checker->next = NULL;
+#endif
+}
+
+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker)
+{
+	checker->owner = _mali_osk_get_tid();
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) {
+			printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n",
+			       _mali_osk_get_tid(), checker);
+			dump_stack();
+		}
+	}
+#endif
+}
+
+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker)
+{
+
+#ifdef LOCK_ORDER_CHECKING
+	if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) {
+		remove_lock_from_log(checker, _mali_osk_get_tid());
+	}
+#endif
+	checker->owner = 0;
+}
+
+
+#ifdef LOCK_ORDER_CHECKING
+/* Lock order checking
+ * -------------------
+ *
+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the
+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s,
+ * make sure that a lock that is taken has a higher order than the current highest-order lock a
+ * thread holds.
+ *
+ * This is done in the following manner:
+ * - A linked list keeps track of locks held by a thread.
+ * - A `next' pointer is added to each lock. This is used to chain the locks together.
+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking
+ *   the given lock is legal. It will follow the linked list  to find the last
+ *   lock taken by this thread. If the last lock's order was lower than the
+ *   lock that is to be taken, it appends the new lock to the list and returns
+ *   true, if not, it return false. This return value is assert()'ed on in
+ *   _mali_osk_lock_wait().
+ */
+
+static struct _mali_osk_lock_debug_s *lock_lookup_list;
+
+static void dump_lock_tracking_list(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 1;
+
+	/* print list for debugging purposes */
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order);
+		l = l->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+	printk(" NULL\n");
+}
+
+static int tracking_list_length(void)
+{
+	struct _mali_osk_lock_debug_s *l;
+	u32 n = 0;
+	l = lock_lookup_list;
+
+	while (NULL != l) {
+		l = l->next;
+		n++;
+		MALI_DEBUG_ASSERT(n < 100);
+	}
+	return n;
+}
+
+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	mali_bool ret = MALI_FALSE;
+	_mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST;
+	struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe;
+	struct _mali_osk_lock_debug_s *l;
+	unsigned long local_lock_flag;
+	u32 len;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+
+	l  = lock_lookup_list;
+	if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */
+		lock_lookup_list = lock;
+		spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+		return MALI_TRUE;
+	} else {
+		/* Traverse the locks taken and find the lock of the highest order.
+		 * Since several threads may hold locks, each lock's owner must be
+		 * checked so that locks not owned by this thread can be ignored. */
+		for (;;) {
+			MALI_DEBUG_ASSERT_POINTER(l);
+			if (tid == l->owner && l->order >= highest_order_for_tid) {
+				highest_order_for_tid = l->order;
+				highest_order_lock = l;
+			}
+
+			if (NULL != l->next) {
+				l = l->next;
+			} else {
+				break;
+			}
+		}
+
+		l->next = lock;
+		l->next = NULL;
+	}
+
+	/* We have now found the highest order lock currently held by this thread and can see if it is
+	 * legal to take the requested lock. */
+	ret = highest_order_for_tid < lock->order;
+
+	if (!ret) {
+		printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n",
+		       lock->order, lock_order_to_string(lock->order),
+		       highest_order_for_tid, lock_order_to_string(highest_order_for_tid));
+		dump_lock_tracking_list();
+	}
+
+	if (len + 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+	return ret;
+}
+
+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid)
+{
+	struct _mali_osk_lock_debug_s *curr;
+	struct _mali_osk_lock_debug_s *prev = NULL;
+	unsigned long local_lock_flag;
+	u32 len;
+	u32 n = 0;
+
+	spin_lock_irqsave(&lock_tracking_lock, local_lock_flag);
+	len = tracking_list_length();
+	curr = lock_lookup_list;
+
+	if (NULL == curr) {
+		printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n");
+		dump_lock_tracking_list();
+	}
+
+	MALI_DEBUG_ASSERT_POINTER(curr);
+
+
+	while (lock != curr) {
+		prev = curr;
+
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		curr = curr->next;
+		MALI_DEBUG_ASSERT(n++ < 100);
+	}
+
+	if (NULL == prev) {
+		lock_lookup_list = curr->next;
+	} else {
+		MALI_DEBUG_ASSERT_POINTER(curr);
+		MALI_DEBUG_ASSERT_POINTER(prev);
+		prev->next = curr->next;
+	}
+
+	lock->next = NULL;
+
+	if (len - 1 != tracking_list_length()) {
+		printk(KERN_ERR "************ lock: %p\n", lock);
+		printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length());
+		dump_lock_tracking_list();
+		MALI_DEBUG_ASSERT_POINTER(NULL);
+	}
+
+	spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag);
+}
+
+static const char *const lock_order_to_string(_mali_osk_lock_order_t order)
+{
+	switch (order) {
+	case _MALI_OSK_LOCK_ORDER_SESSIONS:
+		return "_MALI_OSK_LOCK_ORDER_SESSIONS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_SESSION:
+		return "_MALI_OSK_LOCK_ORDER_MEM_SESSION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_INFO:
+		return "_MALI_OSK_LOCK_ORDER_MEM_INFO";
+		break;
+	case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE:
+		return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP:
+		return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_EXECUTION:
+		return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_EXECUTOR:
+		return "_MALI_OSK_LOCK_ORDER_EXECUTOR";
+		break;
+	case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM:
+		return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED:
+		return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED";
+		break;
+	case _MALI_OSK_LOCK_ORDER_DMA_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PROFILING:
+		return "_MALI_OSK_LOCK_ORDER_PROFILING";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2:
+		return "_MALI_OSK_LOCK_ORDER_L2";
+		break;
+	case _MALI_OSK_LOCK_ORDER_L2_COMMAND:
+		return "_MALI_OSK_LOCK_ORDER_L2_COMMAND";
+		break;
+	case _MALI_OSK_LOCK_ORDER_UTILIZATION:
+		return "_MALI_OSK_LOCK_ORDER_UTILIZATION";
+		break;
+	case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS:
+		return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS";
+		break;
+	case _MALI_OSK_LOCK_ORDER_PM_STATE:
+		return "_MALI_OSK_LOCK_ORDER_PM_STATE";
+		break;
+	default:
+		return "<UNKNOWN_LOCK_ORDER>";
+	}
+}
+#endif /* LOCK_ORDER_CHECKING */
+#endif /* DEBUG */
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_locks.h b/drivers/gpu/arm/utgard/linux/mali_osk_locks.h
new file mode 100644
index 000000000000..aa32a81e7496
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_locks.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_locks.h
+ * Defines OS abstraction of lock and mutex
+ */
+#ifndef _MALI_OSK_LOCKS_H
+#define _MALI_OSK_LOCKS_H
+
+#include <linux/spinlock.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+
+#include <linux/slab.h>
+
+#include "mali_osk_types.h"
+
+#ifdef _cplusplus
+extern "C" {
+#endif
+
+	/* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */
+#ifdef DEBUG
+	struct _mali_osk_lock_debug_s {
+		u32 owner;
+		_mali_osk_lock_flags_t orig_flags;
+		_mali_osk_lock_order_t order;
+		struct _mali_osk_lock_debug_s *next;
+	};
+#endif
+
+	/* Anstraction of spinlock_t */
+	struct _mali_osk_spinlock_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		spinlock_t spinlock;
+	};
+
+	/* Abstration of spinlock_t and lock flag which is used to store register's state before locking */
+	struct _mali_osk_spinlock_irq_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+
+		spinlock_t spinlock;
+		unsigned long flags;
+	};
+
+	/* Abstraction of rw_semaphore in OS */
+	struct _mali_osk_mutex_rw_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+		_mali_osk_lock_mode_t mode;
+#endif
+
+		struct rw_semaphore rw_sema;
+	};
+
+	/* Mutex and mutex_interruptible functions share the same osk mutex struct */
+	struct _mali_osk_mutex_s {
+#ifdef DEBUG
+		struct _mali_osk_lock_debug_s checker;
+#endif
+		struct mutex mutex;
+	};
+
+#ifdef DEBUG
+	/** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and
+	 * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we
+	 * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */
+	void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order);
+	void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker);
+	void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker);
+
+	/** @brief This function can return a given lock's owner when DEBUG     is enabled. */
+	static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock)
+	{
+		return lock->owner;
+	}
+#else
+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0)
+#define _mali_osk_locks_debug_add(x) do {} while (0)
+#define _mali_osk_locks_debug_remove(x) do {} while (0)
+#endif
+
+	/** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/
+	static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL);
+		if (NULL == lock) {
+			return NULL;
+		}
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock a spinlock */
+	static inline void  _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		spin_lock(&lock->spinlock);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock a spinlock */
+	static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock(&lock->spinlock);
+	}
+
+	/** @brief Free a memory block which the argument lock pointed to and its type must be
+	 * _mali_osk_spinlock_t *. */
+	static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be
+	 * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */
+	static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_spinlock_irq_t *lock = NULL;
+		lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		lock->flags = 0;
+		spin_lock_init(&lock->spinlock);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief Lock spinlock and save the register's state */
+	static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock)
+	{
+		unsigned long tmp_flags;
+
+		BUG_ON(NULL == lock);
+		spin_lock_irqsave(&lock->spinlock, tmp_flags);
+		lock->flags = tmp_flags;
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock spinlock with saved register's state */
+	static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		spin_unlock_irqrestore(&lock->spinlock, lock->flags);
+	}
+
+	/** @brief Destroy a given memory block which lock pointed to, and the lock type must be
+	 * _mali_osk_spinlock_irq_t *. */
+	static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call
+	 * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */
+	static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_rw_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+
+		init_rwsem(&lock->rw_sema);
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode
+	 * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */
+	static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			down_read(&lock->rw_sema);
+		} else {
+			down_write(&lock->rw_sema);
+		}
+
+#ifdef DEBUG
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			lock->mode = mode;
+		} else { /* mode == _MALI_OSK_LOCKMODE_RO */
+			lock->mode = mode;
+		}
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+#endif
+	}
+
+	/** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */
+	static inline void  _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode)
+	{
+		BUG_ON(NULL == lock);
+		BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode));
+#ifdef DEBUG
+		/* make sure the thread releasing the lock actually was the owner */
+		if (mode == _MALI_OSK_LOCKMODE_RW) {
+			_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+			/* This lock now has no owner */
+			lock->checker.owner = 0;
+		}
+#endif
+
+		if (mode == _MALI_OSK_LOCKMODE_RO) {
+			up_read(&lock->rw_sema);
+		} else {
+			up_write(&lock->rw_sema);
+		}
+	}
+
+	/** @brief Free a given memory block which lock pointed to and its type must be
+	 * _mali_sok_mutex_rw_t *. */
+	static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+	/** @brief Mutex & mutex_interruptible share the same init and term function, because they have the
+	 * same osk mutex struct, and the difference between them is which locking function they use */
+	static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order)
+	{
+		_mali_osk_mutex_t *lock = NULL;
+
+		lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL);
+
+		if (NULL == lock) {
+			return NULL;
+		}
+		mutex_init(&lock->mutex);
+
+		_mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order);
+		return lock;
+	}
+
+	/** @brief  Lock the lock->mutex with mutex_lock_interruptible function */
+	static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock)
+	{
+		_mali_osk_errcode_t err = _MALI_OSK_ERR_OK;
+
+		BUG_ON(NULL == lock);
+
+		if (mutex_lock_interruptible(&lock->mutex)) {
+			printk(KERN_WARNING "Mali: Can not lock mutex\n");
+			err = _MALI_OSK_ERR_RESTARTSYSCALL;
+		}
+
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+		return err;
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */
+	static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */
+	static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		mutex_lock(&lock->mutex);
+		_mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock);
+	}
+
+	/** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */
+	static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock)
+	{
+		BUG_ON(NULL == lock);
+		_mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock);
+		mutex_unlock(&lock->mutex);
+	}
+
+	/** @brief Free a given memory block which lock point. */
+	static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock)
+	{
+		/* Parameter validation  */
+		BUG_ON(NULL == lock);
+
+		/* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */
+		kfree(lock);
+	}
+
+#ifdef _cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_low_level_mem.c b/drivers/gpu/arm/utgard/linux/mali_osk_low_level_mem.c
new file mode 100644
index 000000000000..c14e872a7d63
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_low_level_mem.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_low_level_mem.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+
+void _mali_osk_mem_barrier(void)
+{
+	mb();
+}
+
+void _mali_osk_write_mem_barrier(void)
+{
+	wmb();
+}
+
+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description)
+{
+	return (mali_io_address)ioremap_nocache(phys, size);
+}
+
+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt)
+{
+	iounmap((void *)virt);
+}
+
+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description)
+{
+#if MALI_LICENSE_IS_GPL
+	return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */
+#else
+	return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK);
+#endif
+}
+
+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size)
+{
+#if !MALI_LICENSE_IS_GPL
+	release_mem_region(phys, size);
+#endif
+}
+
+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	__raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset);
+}
+
+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset)
+{
+	return ioread32(((u8 *)addr) + offset);
+}
+
+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val)
+{
+	iowrite32(val, ((u8 *)addr) + offset);
+}
+
+void _mali_osk_cache_flushall(void)
+{
+	/** @note Cached memory is not currently supported in this implementation */
+}
+
+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size)
+{
+	_mali_osk_write_mem_barrier();
+}
+
+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size)
+{
+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096
+	u32 retval = 0;
+	void *temp_buf;
+
+	temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL);
+	if (NULL != temp_buf) {
+		u32 bytes_left_to_copy = size;
+		u32 i;
+		for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+			u32 size_to_copy;
+			u32 size_copied;
+			u32 bytes_left;
+
+			if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) {
+				size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE;
+			} else {
+				size_to_copy = bytes_left_to_copy;
+			}
+
+			bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy);
+			size_copied = size_to_copy - bytes_left;
+
+			bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied);
+			size_copied -= bytes_left;
+
+			bytes_left_to_copy -= size_copied;
+			retval += size_copied;
+
+			if (size_copied != size_to_copy) {
+				break; /* Early out, we was not able to copy this entire block */
+			}
+		}
+
+		kfree(temp_buf);
+	}
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args)
+{
+	void __user *src;
+	void __user *dst;
+	struct mali_session_data *session;
+
+	MALI_DEBUG_ASSERT_POINTER(args);
+
+	session = (struct mali_session_data *)(uintptr_t)args->ctx;
+
+	if (NULL == session) {
+		return _MALI_OSK_ERR_INVALID_ARGS;
+	}
+
+	src = (void __user *)(uintptr_t)args->src;
+	dst = (void __user *)(uintptr_t)args->dest;
+
+	/* Return number of bytes actually copied */
+	args->size = _mali_osk_mem_write_safe(dst, src, args->size);
+	return _MALI_OSK_ERR_OK;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_mali.c b/drivers/gpu/arm/utgard/linux/mali_osk_mali.c
new file mode 100644
index 000000000000..a3749d8057b4
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_mali.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_mali.c
+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/mali/mali_utgard.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "mali_osk_mali.h"
+#include "mali_kernel_common.h" /* MALI_xxx macros */
+#include "mali_osk.h"           /* kernel side OS functions */
+#include "mali_kernel_linux.h"
+
+
+#if defined(CONFIG_MALI_DT) && !defined(CONFIG_MALI_PLAT_SPECIFIC_DT)
+
+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF
+
+/**
+ * Define the max number of resource we could have.
+ */
+#define MALI_OSK_MAX_RESOURCE_NUMBER 27
+
+/**
+ * Define the max number of resource with interrupts, and they are
+ * the first 20 elements in array mali_osk_resource_bank.
+ */
+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20
+
+/**
+ * pp core start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2
+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17
+
+/**
+ * L2 cache start and end location in mali_osk_resource_bank array.
+ */
+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20
+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22
+
+/**
+ * DMA unit location.
+ */
+#define MALI_OSK_RESOURCE_DMA_LOCATION 26
+
+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = {
+	{.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",},
+	{.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",},
+	{.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",},
+	{.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",},
+	{.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",},
+	{.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",},
+	{.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "IRQPP2",},
+	{.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "IRQPPMMU2",},
+	{.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "IRQPP3",},
+	{.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "IRQPPMMU3",},
+	{.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",},
+	{.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",},
+	{.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",},
+	{.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",},
+	{.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",},
+	{.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",},
+	{.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",},
+	{.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",},
+	{.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",},
+	{.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,},
+	{.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,},
+	{.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,},
+	{.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,},
+	{.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,},
+	{.description = "Mali_DMA", .base = MALI_OFFSET_DMA,},
+};
+
+static int _mali_osk_get_compatible_name(const char **out_string)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	return of_property_read_string(node, "compatible", out_string);
+}
+
+_mali_osk_errcode_t _mali_osk_resource_initialize(void)
+{
+	mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE;
+	int i, pp_core_num = 0, l2_core_num = 0;
+	struct resource *res;
+	const char *compatible_name = NULL;
+
+	if (0 == _mali_osk_get_compatible_name(&compatible_name)) {
+		if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) {
+			mali_is_450 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-450 device tree detected."));
+		} else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) {
+			mali_is_470 = MALI_TRUE;
+			MALI_DEBUG_PRINT(2, ("mali-470 device tree detected."));
+		}
+	}
+
+	for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) {
+		res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name);
+		if (res) {
+			mali_osk_resource_bank[i].irq = res->start;
+		} else {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) {
+		if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) {
+			pp_core_num++;
+		}
+	}
+
+	/* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */
+	if (0 != pp_core_num % 2) {
+		MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal."));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	pp_core_num /= 2;
+
+	/**
+	 * we can caculate the number of l2 cache core according the number of pp core number
+	 * and device type(mali400/mali450/mali470).
+	 */
+	l2_core_num = 1;
+	if (mali_is_450) {
+		if (pp_core_num > 4) {
+			l2_core_num = 3;
+		} else if (pp_core_num <= 4) {
+			l2_core_num = 2;
+		}
+	}
+
+	for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) {
+		mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+	}
+
+	/* If device is not mali-450 type, we have to remove related resource from resource bank. */
+	if (!(mali_is_450 || mali_is_470)) {
+		for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+			mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+		}
+	}
+
+	if (mali_is_470)
+		mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+
+	if (NULL == mali_platform_device) {
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	/* Traverse all of resources in resources bank to find the matching one. */
+	for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) {
+		if (mali_osk_resource_bank[i].base == addr) {
+			if (NULL != res) {
+				res->base = addr + _mali_osk_resource_base_address();
+				res->description = mali_osk_resource_bank[i].description;
+				res->irq = mali_osk_resource_bank[i].irq;
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	struct resource *reg_res = NULL;
+	uintptr_t ret = 0;
+
+	reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0);
+
+	if (NULL != reg_res) {
+		ret = reg_res->start;
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	struct property *prop;
+	const __be32 *p;
+	int length = 0, i = 0;
+	u32 u;
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n"));
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (!of_get_property(node, "pmu_domain_config", &length)) {
+		return;
+	}
+
+	if (array_size != length / sizeof(u32)) {
+		MALI_PRINT_ERROR(("Wrong pmu domain config in device tree."));
+		return;
+	}
+
+	of_property_for_each_u32(node, "pmu_domain_config", prop, p, u) {
+		domain_config_array[i] = (u16)u;
+		i++;
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	struct device_node *node = mali_platform_device->dev.of_node;
+	u32 switch_delay;
+
+	MALI_DEBUG_ASSERT(NULL != node);
+
+	if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) {
+		return switch_delay;
+	} else {
+		MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n"));
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_MALI_DT && !CONFIG_MALI_PLAT_SPECIFIC_DT */
+
+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res)
+{
+	int i;
+	uintptr_t phys_addr;
+
+	if (NULL == mali_platform_device) {
+		/* Not connected to a device */
+		return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	}
+
+	phys_addr = addr + _mali_osk_resource_base_address();
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) &&
+		    mali_platform_device->resource[i].start == phys_addr) {
+			if (NULL != res) {
+				res->base = phys_addr;
+				res->description = mali_platform_device->resource[i].name;
+
+				/* Any (optional) IRQ resource belonging to this resource will follow */
+				if ((i + 1) < mali_platform_device->num_resources &&
+				    IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) {
+					res->irq = mali_platform_device->resource[i + 1].start;
+				} else {
+					res->irq = -1;
+				}
+			}
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+uintptr_t _mali_osk_resource_base_address(void)
+{
+	uintptr_t lowest_addr = (uintptr_t)(0 - 1);
+	uintptr_t ret = 0;
+
+	if (NULL != mali_platform_device) {
+		int i;
+		for (i = 0; i < mali_platform_device->num_resources; i++) {
+			if (mali_platform_device->resource[i].flags & IORESOURCE_MEM &&
+			    mali_platform_device->resource[i].start < lowest_addr) {
+				lowest_addr = mali_platform_device->resource[i].start;
+				ret = lowest_addr;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size)
+{
+	_mali_osk_device_data data = { 0, };
+
+	MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n"));
+	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
+		/* Copy the custom customer power domain config */
+		_mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config));
+	}
+
+	return;
+}
+
+u32 _mali_osk_get_pmu_switch_delay(void)
+{
+	_mali_osk_errcode_t err;
+	_mali_osk_device_data data = { 0, };
+
+	err = _mali_osk_device_data_get(&data);
+
+	if (_MALI_OSK_ERR_OK == err) {
+		return data.pmu_switch_delay;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_MALI_DT */
+
+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(data);
+
+	if (NULL != mali_platform_device) {
+		struct mali_gpu_device_data *os_data = NULL;
+
+		os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data;
+		if (NULL != os_data) {
+			/* Copy data from OS dependant struct to Mali neutral struct (identical!) */
+			BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data));
+			_mali_osk_memcpy(data, os_data, sizeof(*os_data));
+
+			return _MALI_OSK_ERR_OK;
+		}
+	}
+
+	return _MALI_OSK_ERR_ITEM_NOT_FOUND;
+}
+
+u32 _mali_osk_identify_gpu_resource(void)
+{
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL))
+		/* Mali 450 */
+		return 0x450;
+
+	if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL))
+		/* Mali 470 */
+		return 0x470;
+
+	/* Mali 400 */
+	return 0x400;
+}
+
+mali_bool _mali_osk_shared_interrupts(void)
+{
+	u32 irqs[128];
+	u32 i, j, irq, num_irqs_found = 0;
+
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources);
+
+	for (i = 0; i < mali_platform_device->num_resources; i++) {
+		if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) {
+			irq = mali_platform_device->resource[i].start;
+
+			for (j = 0; j < num_irqs_found; ++j) {
+				if (irq == irqs[j]) {
+					return MALI_TRUE;
+				}
+			}
+
+			irqs[num_irqs_found++] = irq;
+		}
+	}
+
+	return MALI_FALSE;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_math.c b/drivers/gpu/arm/utgard/linux/mali_osk_math.c
new file mode 100644
index 000000000000..085ce76f7665
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_math.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_math.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/bitops.h>
+
+u32 _mali_osk_clz(u32 input)
+{
+	return 32 - fls(input);
+}
+
+u32 _mali_osk_fls(u32 input)
+{
+	return fls(input);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_memory.c b/drivers/gpu/arm/utgard/linux/mali_osk_memory.c
new file mode 100644
index 000000000000..390e613e186d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_memory.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_memory.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void inline *_mali_osk_calloc(u32 n, u32 size)
+{
+	return kcalloc(n, size, GFP_KERNEL);
+}
+
+void inline *_mali_osk_malloc(u32 size)
+{
+	return kmalloc(size, GFP_KERNEL);
+}
+
+void inline _mali_osk_free(void *ptr)
+{
+	kfree(ptr);
+}
+
+void inline *_mali_osk_valloc(u32 size)
+{
+	return vmalloc(size);
+}
+
+void inline _mali_osk_vfree(void *ptr)
+{
+	vfree(ptr);
+}
+
+void inline *_mali_osk_memcpy(void *dst, const void *src, u32  len)
+{
+	return memcpy(dst, src, len);
+}
+
+void inline *_mali_osk_memset(void *s, u32 c, u32 n)
+{
+	return memset(s, c, n);
+}
+
+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated)
+{
+	/* No need to prevent an out-of-memory dialogue appearing on Linux,
+	 * so we always return MALI_TRUE.
+	 */
+	return MALI_TRUE;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_misc.c b/drivers/gpu/arm/utgard/linux/mali_osk_misc.c
new file mode 100644
index 000000000000..0a619e3fc27e
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_misc.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_misc.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include "mali_osk.h"
+
+#if !defined(CONFIG_MALI_QUIET)
+void _mali_osk_dbgmsg(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+}
+#endif /* !defined(CONFIG_MALI_QUIET) */
+
+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...)
+{
+	int res;
+	va_list args;
+	va_start(args, fmt);
+
+	res = vscnprintf(buf, (size_t)size, fmt, args);
+
+	va_end(args);
+	return res;
+}
+
+void _mali_osk_ctxprintf(_mali_osk_print_ctx *print_ctx, const char *fmt, ...)
+{
+	va_list args;
+	char buf[512];
+
+	va_start(args, fmt);
+	vscnprintf(buf, 512, fmt, args);
+	seq_printf(print_ctx, buf);
+	va_end(args);
+}
+
+void _mali_osk_abort(void)
+{
+	/* make a simple fault by dereferencing a NULL pointer */
+	dump_stack();
+	*(int *)0 = 0;
+}
+
+void _mali_osk_break(void)
+{
+	_mali_osk_abort();
+}
+
+u32 _mali_osk_get_pid(void)
+{
+	/* Thread group ID is the process ID on Linux */
+	return (u32)current->tgid;
+}
+
+char *_mali_osk_get_comm(void)
+{
+	return (char *)current->comm;
+}
+
+
+u32 _mali_osk_get_tid(void)
+{
+	/* pid is actually identifying the thread on Linux */
+	u32 tid = current->pid;
+
+	/* If the pid is 0 the core was idle.  Instead of returning 0 we return a special number
+	 * identifying which core we are on. */
+	if (0 == tid) {
+		tid = -(1 + raw_smp_processor_id());
+	}
+
+	return tid;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_notification.c b/drivers/gpu/arm/utgard/linux/mali_osk_notification.c
new file mode 100644
index 000000000000..e66fe83f3557
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_notification.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_notification.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/**
+ * Declaration of the notification queue object type
+ * Contains a linked list of notification pending delivery to user space.
+ * It also contains a wait queue of exclusive waiters blocked in the ioctl
+ * When a new notification is posted a single thread is resumed.
+ */
+struct _mali_osk_notification_queue_t_struct {
+	spinlock_t mutex; /**< Mutex protecting the list */
+	wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */
+	struct list_head head; /**< List of notifications waiting to be picked up */
+};
+
+typedef struct _mali_osk_notification_wrapper_t_struct {
+	struct list_head list;           /**< Internal linked list variable */
+	_mali_osk_notification_t data;   /**< Notification data */
+} _mali_osk_notification_wrapper_t;
+
+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void)
+{
+	_mali_osk_notification_queue_t         *result;
+
+	result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL);
+	if (NULL == result) return NULL;
+
+	spin_lock_init(&result->mutex);
+	init_waitqueue_head(&result->receive_queue);
+	INIT_LIST_HEAD(&result->head);
+
+	return result;
+}
+
+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size)
+{
+	/* OPT Recycling of notification objects */
+	_mali_osk_notification_wrapper_t *notification;
+
+	notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size,
+			GFP_KERNEL | __GFP_HIGH | __GFP_REPEAT);
+	if (NULL == notification) {
+		MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n"));
+		return NULL;
+	}
+
+	/* Init the list */
+	INIT_LIST_HEAD(&notification->list);
+
+	if (0 != size) {
+		notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t);
+	} else {
+		notification->data.result_buffer = NULL;
+	}
+
+	/* set up the non-allocating fields */
+	notification->data.notification_type = type;
+	notification->data.result_buffer_size = size;
+
+	/* all ok */
+	return &(notification->data);
+}
+
+void _mali_osk_notification_delete(_mali_osk_notification_t *object)
+{
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+	/* Free the container */
+	kfree(notification);
+}
+
+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue)
+{
+	_mali_osk_notification_t *result;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) {
+		_mali_osk_notification_delete(result);
+	}
+
+	/* not much to do, just free the memory */
+	kfree(queue);
+}
+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_notification_wrapper_t *notification;
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(object);
+
+	notification = container_of(object, _mali_osk_notification_wrapper_t, data);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	list_add_tail(&notification->list, &queue->head);
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	/* and wake up one possible exclusive waiter */
+	wake_up(&queue->receive_queue);
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	unsigned long irq_flags;
+#endif
+
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND;
+	_mali_osk_notification_wrapper_t *wrapper_object;
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_lock_irqsave(&queue->mutex, irq_flags);
+#else
+	spin_lock(&queue->mutex);
+#endif
+
+	if (!list_empty(&queue->head)) {
+		wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list);
+		*result = &(wrapper_object->data);
+		list_del_init(&wrapper_object->list);
+		ret = _MALI_OSK_ERR_OK;
+	}
+
+#if defined(MALI_UPPER_HALF_SCHEDULING)
+	spin_unlock_irqrestore(&queue->mutex, irq_flags);
+#else
+	spin_unlock(&queue->mutex);
+#endif
+
+	return ret;
+}
+
+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result)
+{
+	/* check input */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_ASSERT_POINTER(result);
+
+	/* default result */
+	*result = NULL;
+
+	if (wait_event_interruptible(queue->receive_queue,
+				     _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) {
+		return _MALI_OSK_ERR_RESTARTSYSCALL;
+	}
+
+	return _MALI_OSK_ERR_OK; /* all ok */
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_pm.c b/drivers/gpu/arm/utgard/linux/mali_osk_pm.c
new file mode 100644
index 000000000000..21180d33fe75
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_pm.c
@@ -0,0 +1,83 @@
+/**
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_pm.c
+ * Implementation of the callback functions from common power management
+ */
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_linux.h"
+
+/* Can NOT run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get_sync(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+/* Can run in atomic context */
+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	int err;
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+	err = pm_runtime_get(&(mali_platform_device->dev));
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+#endif
+	if (0 > err && -EINPROGRESS != err) {
+		MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err));
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif
+	return _MALI_OSK_ERR_OK;
+}
+
+
+/* Can run in atomic context */
+void _mali_osk_pm_dev_ref_put(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	MALI_DEBUG_ASSERT_POINTER(mali_platform_device);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+	pm_runtime_mark_last_busy(&(mali_platform_device->dev));
+	pm_runtime_put_autosuspend(&(mali_platform_device->dev));
+#else
+	pm_runtime_put(&(mali_platform_device->dev));
+#endif
+#endif
+}
+
+void _mali_osk_pm_dev_barrier(void)
+{
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_barrier(&(mali_platform_device->dev));
+#endif
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_profiling.c b/drivers/gpu/arm/utgard/linux/mali_osk_profiling.c
new file mode 100644
index 000000000000..cc09748e7316
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_profiling.c
@@ -0,0 +1,1272 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched.h>
+
+#include <mali_profiling_gator_api.h>
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_ukk.h"
+#include "mali_uk_types.h"
+#include "mali_osk_profiling.h"
+#include "mali_linux_trace.h"
+#include "mali_gp.h"
+#include "mali_pp.h"
+#include "mali_l2_cache.h"
+#include "mali_user_settings_db.h"
+#include "mali_executor.h"
+#include "mali_memory_manager.h"
+
+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100
+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000         /*1 ms */
+
+#define MALI_PROFILING_STREAM_BUFFER_SIZE       (1 << 12)
+#define MALI_PROFILING_STREAM_BUFFER_NUM        100
+
+/**
+ * Define the mali profiling stream struct.
+ */
+typedef struct mali_profiling_stream {
+	u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE];
+	u32 used_size;
+	struct list_head list;
+} mali_profiling_stream;
+
+typedef struct mali_profiling_stream_list {
+	spinlock_t spin_lock;
+	struct list_head free_list;
+	struct list_head queue_list;
+} mali_profiling_stream_list;
+
+static const char mali_name[] = "4xx";
+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n";
+
+static u32 profiling_sample_rate = 0;
+static u32 first_sw_counter_index = 0;
+
+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE;
+static u32 num_counters_enabled = 0;
+static u32 mem_counters_enabled = 0;
+
+static _mali_osk_atomic_t stream_fd_if_used;
+
+static wait_queue_head_t stream_fd_wait_queue;
+static mali_profiling_counter *global_mali_profiling_counters = NULL;
+static u32 num_global_mali_profiling_counters = 0;
+
+static mali_profiling_stream_list *global_mali_stream_list = NULL;
+static mali_profiling_stream *mali_counter_stream = NULL;
+static mali_profiling_stream *mali_core_activity_stream = NULL;
+static u64 mali_core_activity_stream_dequeue_time = 0;
+static spinlock_t mali_activity_lock;
+static u32 mali_activity_cores_num =  0;
+static struct hrtimer profiling_sampling_timer;
+
+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS;
+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS;
+
+static u32 current_profiling_pid = 0;
+
+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream_list);
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) {
+		list_del(&profiling_stream->list);
+		kfree(profiling_stream);
+	}
+
+	kfree(profiling_stream_list);
+}
+
+static void _mali_profiling_global_stream_list_free(void)
+{
+	mali_profiling_stream *profiling_stream, *tmp_profiling_stream;
+	unsigned long irq_flags;
+
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) {
+		profiling_stream->used_size = 0;
+		list_move(&profiling_stream->list, &global_mali_stream_list->free_list);
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	_mali_osk_errcode_t ret = _MALI_OSK_ERR_OK;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+
+	if (!list_empty(stream_list)) {
+		*new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list);
+		list_del_init(&(*new_mali_profiling_stream)->list);
+	} else {
+		ret = _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+
+	return ret;
+}
+
+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream)
+{
+	unsigned long irq_flags;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	MALI_DEBUG_ASSERT_POINTER(stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	list_add_tail(&current_mali_profiling_stream->list, stream_list);
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+}
+
+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void)
+{
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+	return list_empty(&global_mali_stream_list->queue_list);
+}
+
+static u32 _mali_profiling_global_stream_queue_list_next_size(void)
+{
+	unsigned long irq_flags;
+	u32 size = 0;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags);
+	if (!list_empty(&global_mali_stream_list->queue_list)) {
+		mali_profiling_stream *next_mali_profiling_stream =
+			list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list);
+		size = next_mali_profiling_stream->used_size;
+	}
+	spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags);
+	return size;
+}
+
+/* The mali profiling stream file operations functions. */
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos);
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait);
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations mali_profiling_stream_fops = {
+	.release = _mali_profiling_stream_release,
+	.read    = _mali_profiling_stream_read,
+	.poll    = _mali_profiling_stream_poll,
+};
+
+static ssize_t _mali_profiling_stream_read(
+	struct file *filp,
+	char __user *buffer,
+	size_t      size,
+	loff_t      *f_pos)
+{
+	u32 copy_len = 0;
+	mali_profiling_stream *current_mali_profiling_stream;
+	u32 used_size;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	while (!_mali_profiling_global_stream_queue_list_if_empty()) {
+		used_size = _mali_profiling_global_stream_queue_list_next_size();
+		if (used_size <= ((u32)size - copy_len)) {
+			current_mali_profiling_stream = NULL;
+			_mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list,
+					&current_mali_profiling_stream);
+			MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream);
+			if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) {
+				current_mali_profiling_stream->used_size = 0;
+				_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+				return -EFAULT;
+			}
+			copy_len += current_mali_profiling_stream->used_size;
+			current_mali_profiling_stream->used_size = 0;
+			_mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream);
+		} else {
+			break;
+		}
+	}
+	return (ssize_t)copy_len;
+}
+
+static unsigned int  _mali_profiling_stream_poll(struct file *filp, poll_table *wait)
+{
+	poll_wait(filp, &stream_fd_wait_queue, wait);
+	if (!_mali_profiling_global_stream_queue_list_if_empty())
+		return POLLIN;
+	return 0;
+}
+
+static int  _mali_profiling_stream_release(struct inode *inode, struct file *filp)
+{
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	return 0;
+}
+
+/* The funs for control packet and stream data.*/
+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(size); ++i)
+		buf[i] = (size >> 8 * i) & 0xFF;
+}
+
+static u32 _mali_profiling_get_packet_size(unsigned char *const buf)
+{
+	u32 i;
+	u32 size = 0;
+	for (i = 0; i < sizeof(size); ++i)
+		size |= (u32)buf[i] << 8 * i;
+	return size;
+}
+
+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size)
+{
+	u64 int_value = 0;
+	u8 shift = 0;
+	u8 byte_value = ~0;
+
+	while ((byte_value & 0x80) != 0) {
+		MALI_DEBUG_ASSERT((*pos) < packet_size);
+		byte_value = buf[*pos];
+		*pos += 1;
+		int_value |= (u32)(byte_value & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) {
+		int_value |= -(1 << shift);
+	}
+
+	return int_value;
+}
+
+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value)
+{
+	u32 add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of val */
+		char byte_value = value & 0x7f;
+		value >>= 7;
+
+		if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val)
+{
+	int add_bytes = 0;
+	int more = 1;
+	while (more) {
+		/* low order 7 bits of x */
+		char byte_value = val & 0x7f;
+		val >>= 7;
+
+		if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) {
+			more = 0;
+		} else {
+			byte_value |= 0x80;
+		}
+
+		MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size);
+		buf[pos + add_bytes] = byte_value;
+		add_bytes++;
+	}
+
+	return add_bytes;
+}
+
+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value)
+{
+	u32 add_size = STREAM_HEADER_SIZE;
+	MALI_DEBUG_ASSERT_POINTER(profiling_stream);
+	MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE);
+
+	profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE;
+
+	add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					      profiling_stream->used_size + add_size, current_time);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)0);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)key);
+	add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE,
+					     profiling_stream->used_size + add_size, (s32)counter_value);
+
+	_mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1,
+					add_size - STREAM_HEADER_SIZE);
+
+	profiling_stream->used_size += add_size;
+}
+
+/* The callback function for sampling timer.*/
+static enum hrtimer_restart  _mali_profiling_sampling_counters(struct hrtimer *timer)
+{
+	u32 counter_index;
+	s64 current_time;
+	MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters);
+	MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list);
+
+	MALI_DEBUG_ASSERT(NULL == mali_counter_stream);
+	if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+		    &global_mali_stream_list->free_list, &mali_counter_stream)) {
+
+		MALI_DEBUG_ASSERT_POINTER(mali_counter_stream);
+		MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size);
+
+		/* Capture l2 cache counter values if enabled */
+		if (MALI_TRUE == l2_cache_counter_if_enabled) {
+			int i, j = 0;
+			_mali_profiling_l2_counter_values l2_counters_values;
+			_mali_profiling_get_l2_counters(&l2_counters_values);
+
+			for (i  = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) {
+				if (0 == (j % 2))
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0);
+				else
+					_mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1);
+				j++;
+			}
+		}
+
+		current_time = (s64)_mali_osk_boot_time_get_ns();
+
+		/* Add all enabled counter values into stream */
+		for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) {
+			/* No need to sample these couners here. */
+			if (global_mali_profiling_counters[counter_index].enabled) {
+				if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER &&
+				     global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY)
+				    || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) {
+
+					continue;
+				}
+
+				if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 &&
+				    global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) {
+
+					u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value;
+
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value - prev_val);
+
+					prev_val = global_mali_profiling_counters[counter_index].current_counter_value;
+
+					global_mali_profiling_counters[counter_index].prev_counter_value = prev_val;
+				} else {
+
+					if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) {
+						u32 total_alloc_mem = _mali_ukk_report_memory_usage();
+						global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE;
+					}
+					_mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key,
+									   global_mali_profiling_counters[counter_index].current_counter_value);
+					if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER)
+						global_mali_profiling_counters[counter_index].current_counter_value = 0;
+				}
+			}
+		}
+		_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream);
+		mali_counter_stream = NULL;
+	} else {
+		MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+	}
+
+	wake_up_interruptible(&stream_fd_wait_queue);
+
+	/*Enable the sampling timer again*/
+	if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+		hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate));
+		return HRTIMER_RESTART;
+	}
+	return HRTIMER_NORESTART;
+}
+
+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mali_activity_lock, irq_flags);
+	if (activity == 0)
+		mali_activity_cores_num--;
+	else
+		mali_activity_cores_num++;
+	spin_unlock_irqrestore(&mali_activity_lock, irq_flags);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				u64 current_time = _mali_osk_boot_time_get_ns();
+				u32 add_size = STREAM_HEADER_SIZE;
+
+				if (NULL != mali_core_activity_stream) {
+					if ((mali_core_activity_stream_dequeue_time +  MALI_PROFILING_STREAM_HOLD_TIME < current_time) ||
+					    (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE
+					     - mali_core_activity_stream->used_size)) {
+						_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+						mali_core_activity_stream = NULL;
+						wake_up_interruptible(&stream_fd_wait_queue);
+					}
+				}
+
+				if (NULL == mali_core_activity_stream) {
+					if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue(
+						    &global_mali_stream_list->free_list, &mali_core_activity_stream)) {
+						mali_core_activity_stream_dequeue_time = current_time;
+					} else {
+						MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n"));
+						wake_up_interruptible(&stream_fd_wait_queue);
+						break;
+					}
+
+				}
+
+				mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY;
+
+				add_size += _mali_profiling_pack_long(mali_core_activity_stream->data,
+								      MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity);
+				add_size += _mali_profiling_pack_int(mali_core_activity_stream->data,
+								     MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid);
+
+				_mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1,
+								add_size - STREAM_HEADER_SIZE);
+
+				mali_core_activity_stream->used_size += add_size;
+
+				if (0 == mali_activity_cores_num) {
+					_mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream);
+					mali_core_activity_stream = NULL;
+					wake_up_interruptible(&stream_fd_wait_queue);
+				}
+
+				break;
+			}
+		}
+	}
+}
+
+static mali_bool _mali_profiling_global_counters_init(void)
+{
+	int core_id, counter_index, counter_number, counter_id;
+	u32 num_l2_cache_cores;
+	u32 num_pp_cores;
+	u32 num_gp_cores = 1;
+
+	MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters);
+	num_pp_cores = mali_pp_get_glob_num_pp_cores();
+	num_l2_cache_cores =    mali_l2_cache_core_get_glob_num_l2_cores();
+
+	num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores
+					     + MALI_PROFILING_SW_COUNTERS_NUM
+					     + MALI_PROFILING_SPECIAL_COUNTERS_NUM
+					     + MALI_PROFILING_MEM_COUNTERS_NUM;
+	global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter));
+
+	if (NULL == global_mali_profiling_counters)
+		return MALI_FALSE;
+
+	counter_index = 0;
+	/*Vertex processor counters */
+	for (core_id = 0; core_id < num_gp_cores; core_id ++) {
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < num_pp_cores; core_id++) {
+		counter_index++;
+		global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < num_l2_cache_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			counter_index++;
+			global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+			_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+					   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) {
+		counter_index++;
+
+		if (0 == first_sw_counter_index)
+			first_sw_counter_index = counter_index;
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER);
+	}
+
+	/* Now set up the special counter entries */
+	for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	/* Now set up the mem counter entries*/
+	for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) {
+
+		counter_index++;
+		_mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name,
+				   sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s",
+				   mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]);
+
+		global_mali_profiling_counters[counter_index].counter_id = counter_id;
+	}
+
+	MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters);
+
+	return MALI_TRUE;
+}
+
+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable)
+{
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER,
+				sizeof(_mali_uk_annotate_profiling_mem_counter_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer;
+			data->counter_id = counter_id;
+			data->key = key;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable)
+{
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL != session) {
+		_mali_osk_notification_t *notification;
+		_mali_osk_notification_queue_t *queue;
+
+		queue = session->ioctl_queue;
+		MALI_DEBUG_ASSERT(NULL != queue);
+
+		notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE,
+				sizeof(_mali_uk_annotate_profiling_enable_s));
+
+		if (NULL != notification) {
+			_mali_uk_annotate_profiling_enable_s *data = notification->result_buffer;
+			data->sampling_rate = sampling_rate;
+			data->enable = enable;
+
+			_mali_osk_notification_queue_send(queue, notification);
+		} else {
+			MALI_PRINT_ERROR(("Failed to create notification object!\n"));
+		}
+	} else {
+		MALI_PRINT_ERROR(("Failed to find the right session!\n"));
+	}
+}
+
+
+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start)
+{
+	int i;
+	mali_profiling_stream *new_mali_profiling_stream = NULL;
+	mali_profiling_stream_list *new_mali_profiling_stream_list = NULL;
+	if (MALI_TRUE == auto_start) {
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+	}
+
+	/*Init the global_mali_stream_list*/
+	MALI_DEBUG_ASSERT(NULL == global_mali_stream_list);
+	new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL);
+
+	if (NULL == new_mali_profiling_stream_list) {
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	spin_lock_init(&new_mali_profiling_stream_list->spin_lock);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list);
+	INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list);
+
+	spin_lock_init(&mali_activity_lock);
+	mali_activity_cores_num =  0;
+
+	for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) {
+		new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL);
+		if (NULL == new_mali_profiling_stream) {
+			_mali_profiling_stream_list_destory(new_mali_profiling_stream_list);
+			return _MALI_OSK_ERR_NOMEM;
+		}
+
+		INIT_LIST_HEAD(&new_mali_profiling_stream->list);
+		new_mali_profiling_stream->used_size = 0;
+		list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list);
+
+	}
+
+	_mali_osk_atomic_init(&stream_fd_if_used, 0);
+	init_waitqueue_head(&stream_fd_wait_queue);
+
+	hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	profiling_sampling_timer.function = _mali_profiling_sampling_counters;
+
+	global_mali_stream_list = new_mali_profiling_stream_list;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_profiling_term(void)
+{
+	if (0 != profiling_sample_rate) {
+		hrtimer_cancel(&profiling_sampling_timer);
+		profiling_sample_rate = 0;
+	}
+	_mali_osk_atomic_term(&stream_fd_if_used);
+
+	if (NULL != global_mali_profiling_counters) {
+		_mali_osk_free(global_mali_profiling_counters);
+		global_mali_profiling_counters = NULL;
+		num_global_mali_profiling_counters = 0;
+	}
+
+	if (NULL != global_mali_stream_list) {
+		_mali_profiling_stream_list_destory(global_mali_stream_list);
+		global_mali_stream_list = NULL;
+	}
+
+}
+
+void _mali_osk_profiling_stop_sampling(u32 pid)
+{
+	if (pid == current_profiling_pid) {
+
+		int i;
+		/* Reset all counter states when closing connection.*/
+		for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+			_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+			global_mali_profiling_counters[i].enabled = 0;
+			global_mali_profiling_counters[i].prev_counter_value = 0;
+			global_mali_profiling_counters[i].current_counter_value = 0;
+		}
+		l2_cache_counter_if_enabled = MALI_FALSE;
+		num_counters_enabled = 0;
+		mem_counters_enabled = 0;
+		_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+		_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+		/* Delete sampling timer when closing connection. */
+		if (0 != profiling_sample_rate) {
+			hrtimer_cancel(&profiling_sampling_timer);
+			profiling_sample_rate = 0;
+		}
+		current_profiling_pid = 0;
+	}
+}
+
+void    _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	/*Record the freq & volt to global_mali_profiling_counters here. */
+	if (0 != profiling_sample_rate) {
+		u32 channel;
+		u32 state;
+		channel = (event_id >> 16) & 0xFF;
+		state = ((event_id >> 24) & 0xF) << 24;
+
+		switch (state) {
+		case MALI_PROFILING_EVENT_TYPE_SINGLE:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) {
+				u32 reason = (event_id & 0xFFFF);
+				if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) {
+					_mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0);
+					_mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1);
+				}
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_START:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1);
+			}
+			break;
+		case MALI_PROFILING_EVENT_TYPE_STOP:
+			if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) {
+				_mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0);
+			} else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) &&
+				   (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) {
+				u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16);
+				_mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4);
+}
+
+void _mali_osk_profiling_report_sw_counters(u32 *counters)
+{
+	trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters);
+}
+
+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value)
+{
+	if (NULL != global_mali_profiling_counters) {
+		int i ;
+		for (i = 0; i < num_global_mali_profiling_counters; i++) {
+			if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) {
+				global_mali_profiling_counters[i].current_counter_value = value;
+				break;
+			}
+		}
+	}
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args)
+{
+	/* Always add process and thread identificator in the first two data elements for events from user space */
+	_mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]);
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args)
+{
+	u32 *counters = (u32 *)(uintptr_t)args->counters;
+
+	_mali_osk_profiling_report_sw_counters(counters);
+
+	if (NULL != global_mali_profiling_counters) {
+		int i;
+		for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) {
+			if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) {
+				global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i);
+			}
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args)
+{
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) {
+
+		s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops,
+					  session,
+					  O_RDONLY | O_CLOEXEC);
+
+		args->stream_fd = fd;
+		if (0 > fd) {
+			_mali_osk_atomic_dec(&stream_fd_if_used);
+			return _MALI_OSK_ERR_FAULT;
+		}
+		args->stream_fd = fd;
+	} else {
+		_mali_osk_atomic_dec(&stream_fd_if_used);
+		args->stream_fd = -1;
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args)
+{
+	u32 control_packet_size;
+	u32 output_buffer_size;
+
+	struct  mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx;
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) {
+		MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n"));
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	control_packet_size = args->control_packet_size;
+	output_buffer_size = args->response_packet_size;
+
+	if (0 != control_packet_size) {
+		u8 control_type;
+		u8 *control_packet_data;
+		u8 *response_packet_data;
+		u32 version_length = sizeof(utgard_setup_version) - 1;
+
+		control_packet_data = (u8 *)(uintptr_t)args->control_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(control_packet_data);
+		response_packet_data = (u8 *)(uintptr_t)args->response_packet_data;
+		MALI_DEBUG_ASSERT_POINTER(response_packet_data);
+
+		/*Decide if need to ignore Utgard setup version.*/
+		if (control_packet_size >= version_length) {
+			if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) {
+				if (control_packet_size == version_length) {
+					args->response_packet_size = 0;
+					return _MALI_OSK_ERR_OK;
+				} else {
+					control_packet_data += version_length;
+					control_packet_size -= version_length;
+				}
+			}
+		}
+
+		current_profiling_pid = _mali_osk_get_pid();
+
+		control_type = control_packet_data[0];
+		switch (control_type) {
+		case PACKET_HEADER_COUNTERS_REQUEST: {
+			int i;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Send supported counters */
+			*response_packet_data = PACKET_HEADER_COUNTERS_ACK;
+			args->response_packet_size = PACKET_HEADER_SIZE;
+
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				u32 name_size = strlen(global_mali_profiling_counters[i].counter_name);
+
+				if ((args->response_packet_size + name_size + 1) > output_buffer_size) {
+					MALI_PRINT_ERROR(("Response packet data is too large..\n"));
+					return _MALI_OSK_ERR_FAULT;
+				}
+
+				memcpy(response_packet_data + args->response_packet_size,
+				       global_mali_profiling_counters[i].counter_name, name_size + 1);
+
+				args->response_packet_size += (name_size + 1);
+
+				if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)1);
+				} else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores());
+				} else {
+					args->response_packet_size += _mali_profiling_pack_int(response_packet_data,
+								      output_buffer_size, args->response_packet_size, (s32) - 1);
+				}
+			}
+
+			_mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size);
+			break;
+		}
+
+		case PACKET_HEADER_COUNTERS_ENABLE: {
+			int i;
+			u32 request_pos = PACKET_HEADER_SIZE;
+			mali_bool sw_counter_if_enabled = MALI_FALSE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Init all counter states before enable requested counters.*/
+			for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+				_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER);
+				global_mali_profiling_counters[i].enabled = 0;
+				global_mali_profiling_counters[i].prev_counter_value = 0;
+				global_mali_profiling_counters[i].current_counter_value = 0;
+
+				if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+				    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+					_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0);
+				}
+			}
+
+			l2_cache_counter_if_enabled = MALI_FALSE;
+			num_counters_enabled = 0;
+			mem_counters_enabled = 0;
+			_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0);
+			_mali_profiling_control(SW_COUNTER_ENABLE, 0);
+			_mali_profiling_notification_enable(session, 0, 0);
+
+			/* Enable requested counters */
+			while (request_pos < control_packet_size) {
+				u32 begin = request_pos;
+				u32 event;
+				u32 key;
+
+				while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') {
+					++request_pos;
+				}
+
+				++request_pos;
+				event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+				key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+				for (i = 0; i < num_global_mali_profiling_counters; ++i) {
+					u32 name_size = strlen((char *)(control_packet_data + begin));
+					if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) {
+						if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id
+									       && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) {
+							sw_counter_if_enabled = MALI_TRUE;
+							_mali_profiling_control(SW_COUNTER_ENABLE, 1);
+						}
+
+						if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) {
+							_mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1);
+							_mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff);
+							_mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff);
+						}
+
+						if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER &&
+						    global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) {
+							_mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id,
+									key, 1);
+							mem_counters_enabled++;
+						}
+
+						global_mali_profiling_counters[i].counter_event = event;
+						global_mali_profiling_counters[i].key = key;
+						global_mali_profiling_counters[i].enabled = 1;
+
+						_mali_profiling_set_event(global_mali_profiling_counters[i].counter_id,
+									  global_mali_profiling_counters[i].counter_event);
+						num_counters_enabled++;
+						break;
+					}
+				}
+
+				if (i == num_global_mali_profiling_counters) {
+					MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type));
+					return _MALI_OSK_ERR_FAULT;
+				}
+			}
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			break;
+		}
+
+		case PACKET_HEADER_START_CAPTURE_VALUE: {
+			u32 live_rate;
+			u32 request_pos = PACKET_HEADER_SIZE;
+
+			if (PACKET_HEADER_SIZE > control_packet_size ||
+			    control_packet_size !=  _mali_profiling_get_packet_size(control_packet_data + 1)) {
+				MALI_PRINT_ERROR(("Wrong control packet  size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size));
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			/* Read samping rate in nanoseconds and live rate, start capture.*/
+			profiling_sample_rate =  _mali_profiling_read_packet_int(control_packet_data,
+						 &request_pos, control_packet_size);
+
+			live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size);
+
+			if (PACKET_HEADER_SIZE <= output_buffer_size) {
+				*response_packet_data = PACKET_HEADER_ACK;
+				_mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE);
+				args->response_packet_size = PACKET_HEADER_SIZE;
+			} else {
+				return _MALI_OSK_ERR_FAULT;
+			}
+
+			if (0 != num_counters_enabled && 0 != profiling_sample_rate) {
+				_mali_profiling_global_stream_list_free();
+				if (mem_counters_enabled > 0) {
+					_mali_profiling_notification_enable(session, profiling_sample_rate, 1);
+				}
+				hrtimer_start(&profiling_sampling_timer,
+					      ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000),
+					      HRTIMER_MODE_REL_PINNED);
+			}
+
+			break;
+		}
+		default:
+			MALI_PRINT_ERROR(("Unsupported  profiling packet header type %u.\n", control_type));
+			args->response_packet_size  = 0;
+			return _MALI_OSK_ERR_FAULT;
+		}
+	} else {
+		_mali_osk_profiling_stop_sampling(current_profiling_pid);
+		_mali_profiling_notification_enable(session, 0, 0);
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+/**
+ * Called by gator.ko to set HW counters
+ *
+ * @param counter_id The counter ID.
+ * @param event_id Event ID that the counter should count (HW counter value from TRM).
+ *
+ * @return 1 on success, 0 on failure.
+ */
+int _mali_profiling_set_event(u32 counter_id, s32 event_id)
+{
+	if (COUNTER_VP_0_C0 == counter_id) {
+		mali_gp_job_set_gp_counter_src0(event_id);
+	} else if (COUNTER_VP_0_C1 == counter_id) {
+		mali_gp_job_set_gp_counter_src1(event_id);
+	} else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) {
+		/*
+		 * Two compatibility notes for this function:
+		 *
+		 * 1) Previously the DDK allowed per core counters.
+		 *
+		 *    This did not make much sense on Mali-450 with the "virtual PP core" concept,
+		 *    so this option was removed, and only the same pair of HW counters was allowed on all cores,
+		 *    beginning with r3p2 release.
+		 *
+		 *    Starting with r4p0, it is now possible to set different HW counters for the different sub jobs.
+		 *    This should be almost the same, since sub job 0 is designed to run on core 0,
+		 *    sub job 1 on core 1, and so on.
+		 *
+		 *    The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2
+		 *    sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase
+		 *    the predictability of the returned data (as you would be guaranteed data for all the selected HW counters).
+		 *
+		 *    PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions).
+		 *
+		 *    The framework/#defines with Gator still indicates that the counter is for a particular core,
+		 *    but this is internally used as a sub job ID instead (no translation needed).
+		 *
+		 *  2) Global/default vs per sub job counters
+		 *
+		 *     Releases before r3p2 had only per PP core counters.
+		 *     r3p2 releases had only one set of default/global counters which applied to all PP cores
+		 *     Starting with r4p0, we have both a set of default/global counters,
+		 *     and individual counters per sub job (equal to per core).
+		 *
+		 *     To keep compatibility with Gator/DS-5/streamline, the following scheme is used:
+		 *
+		 *     r3p2 release; only counters set for core 0 is handled,
+		 *     this is applied as the default/global set of counters, and will thus affect all cores.
+		 *
+		 *     r4p0 release; counters set for core 0 is applied as both the global/default set of counters,
+		 *     and counters for sub job 0.
+		 *     Counters set for core 1-7 is only applied for the corresponding sub job.
+		 *
+		 *     This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the
+		 *     values for core 0, and thus this will be applied to all PP sub jobs/cores.
+		 *     Advanced mode will also be supported, where individual pairs of HW counters can be selected.
+		 *
+		 *     The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably
+		 *     something we can live with!
+		 *
+		 *     Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU
+		 *     automatically distributes the load between whatever number of cores is available at this particular time.
+		 *     A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible
+		 *     to use a single pair of HW counters (even if the job ran on multiple PP cores).
+		 *     In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs.
+		 */
+		u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1;
+		u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1;
+		if (0 == counter_src) {
+			mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src0(event_id);
+			}
+		} else {
+			mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id);
+			if (0 == sub_job) {
+				mali_pp_job_set_pp_counter_global_src1(event_id);
+			}
+		}
+	} else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) {
+		u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1;
+		struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id);
+
+		if (NULL != l2_cache_core) {
+			u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1;
+			mali_l2_cache_core_set_counter_src(l2_cache_core,
+							   counter_src, event_id);
+			l2_cache_counter_if_enabled = MALI_TRUE;
+		}
+	} else {
+		return 0; /* Failure, unknown event */
+	}
+
+	return 1; /* success */
+}
+
+/**
+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores.
+ * The L2 cache counters are unique in that they are polled by gator, rather than being
+ * transmitted via the tracepoint mechanism.
+ *
+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where
+ *               the counter sources and values will be output
+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores
+ */
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values)
+{
+	u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores();
+	u32 i;
+
+	MALI_DEBUG_ASSERT(l2_cores_num <= 3);
+
+	for (i = 0; i < l2_cores_num; i++) {
+		struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i);
+
+		if (NULL == l2_cache) {
+			continue;
+		}
+
+		mali_l2_cache_core_get_counter_values(l2_cache,
+						      &values->cores[i].source0,
+						      &values->cores[i].value0,
+						      &values->cores[i].source1,
+						      &values->cores[i].value1);
+	}
+
+	return 0;
+}
+
+/**
+ * Called by gator to control the production of profiling information at runtime.
+ */
+void _mali_profiling_control(u32 action, u32 value)
+{
+	switch (action) {
+	case FBDUMP_CONTROL_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE));
+		break;
+	case FBDUMP_CONTROL_RATE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value);
+		break;
+	case SW_COUNTER_ENABLE:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value);
+		break;
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value);
+		break;
+	default:
+		break;  /* Ignore unimplemented actions */
+	}
+}
+
+/**
+ * Called by gator to get mali api version.
+ */
+u32 _mali_profiling_get_api_version(void)
+{
+	return MALI_PROFILING_API_VERSION;
+}
+
+/**
+* Called by gator to get the data about Mali instance in use:
+* product id, version, number of cores
+*/
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values)
+{
+	values->mali_product_id = (u32)mali_kernel_core_get_product_id();
+	values->mali_version_major = mali_kernel_core_get_gpu_major_version();
+	values->mali_version_minor = mali_kernel_core_get_gpu_minor_version();
+	values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores();
+	values->num_of_fp_cores = mali_executor_get_num_cores_total();
+	values->num_of_vp_cores = 1;
+}
+
+
+EXPORT_SYMBOL(_mali_profiling_set_event);
+EXPORT_SYMBOL(_mali_profiling_get_l2_counters);
+EXPORT_SYMBOL(_mali_profiling_control);
+EXPORT_SYMBOL(_mali_profiling_get_api_version);
+EXPORT_SYMBOL(_mali_profiling_get_mali_version);
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_specific.h b/drivers/gpu/arm/utgard/linux/mali_osk_specific.h
new file mode 100644
index 000000000000..db034a5b3c70
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_specific.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_specific.h
+ * Defines per-OS Kernel level specifics, such as unusual workarounds for
+ * certain OSs.
+ */
+
+#ifndef __MALI_OSK_SPECIFIC_H__
+#define __MALI_OSK_SPECIFIC_H__
+
+#include <asm/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/hardirq.h>
+
+
+#include "mali_osk_types.h"
+#include "mali_kernel_linux.h"
+
+#define MALI_STATIC_INLINE static inline
+#define MALI_NON_STATIC_INLINE inline
+
+typedef struct dma_pool *mali_dma_pool;
+
+typedef u32 mali_dma_addr;
+
+#if MALI_ENABLE_CPU_CYCLES
+/* Reads out the clock cycle performance counter of the current cpu.
+   It is useful for cost-free (2 cycle) measuring of the time spent
+   in a code path. Sample before and after, the diff number of cycles.
+   When the CPU is idle it will not increase this clock counter.
+   It means that the counter is accurate if only spin-locks are used,
+   but mutexes may lead to too low values since the cpu might "idle"
+   waiting for the mutex to become available.
+   The clock source is configured on the CPU during mali module load,
+   but will not give useful output after a CPU has been power cycled.
+   It is therefore important to configure the system to not turn of
+   the cpu cores when using this functionallity.*/
+static inline unsigned int mali_get_cpu_cyclecount(void)
+{
+	unsigned int value;
+	/* Reading the CCNT Register - CPU clock counter */
+	asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+	return value;
+}
+
+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64);
+#endif
+
+
+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n)
+{
+	return (u32)copy_from_user(to, from, (unsigned long)n);
+}
+
+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void)
+{
+	return in_atomic();
+}
+
+#define _mali_osk_put_user(x, ptr) put_user(x, ptr)
+
+#endif /* __MALI_OSK_SPECIFIC_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_time.c b/drivers/gpu/arm/utgard/linux/mali_osk_time.c
new file mode 100644
index 000000000000..4deaa101e48f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_time.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2010, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_time.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include "mali_osk.h"
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/delay.h>
+
+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb)
+{
+	return time_after_eq(ticka, tickb) ?
+	       MALI_TRUE : MALI_FALSE;
+}
+
+unsigned long _mali_osk_time_mstoticks(u32 ms)
+{
+	return msecs_to_jiffies(ms);
+}
+
+u32 _mali_osk_time_tickstoms(unsigned long ticks)
+{
+	return jiffies_to_msecs(ticks);
+}
+
+unsigned long _mali_osk_time_tickcount(void)
+{
+	return jiffies;
+}
+
+void _mali_osk_time_ubusydelay(u32 usecs)
+{
+	udelay(usecs);
+}
+
+u64 _mali_osk_time_get_ns(void)
+{
+	struct timespec tsval;
+	getnstimeofday(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
+
+u64 _mali_osk_boot_time_get_ns(void)
+{
+	struct timespec tsval;
+	get_monotonic_boottime(&tsval);
+	return (u64)timespec_to_ns(&tsval);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_timers.c b/drivers/gpu/arm/utgard/linux/mali_osk_timers.c
new file mode 100644
index 000000000000..6bbaee749d64
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_timers.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_timers.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_timer_t_struct {
+	struct timer_list timer;
+};
+
+typedef void (*timer_timeout_function_t)(unsigned long);
+
+_mali_osk_timer_t *_mali_osk_timer_init(void)
+{
+	_mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL);
+	if (NULL != t) init_timer(&t->timer);
+	return t;
+}
+
+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.expires = jiffies + ticks_to_expire;
+	add_timer(&(tim->timer));
+}
+
+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	mod_timer(&(tim->timer), jiffies + ticks_to_expire);
+}
+
+void _mali_osk_timer_del(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer_sync(&(tim->timer));
+}
+
+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	del_timer(&(tim->timer));
+}
+
+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	return 1 == timer_pending(&(tim->timer));
+}
+
+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	tim->timer.data = (unsigned long)data;
+	tim->timer.function = (timer_timeout_function_t)callback;
+}
+
+void _mali_osk_timer_term(_mali_osk_timer_t *tim)
+{
+	MALI_DEBUG_ASSERT_POINTER(tim);
+	kfree(tim);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_wait_queue.c b/drivers/gpu/arm/utgard/linux/mali_osk_wait_queue.c
new file mode 100644
index 000000000000..15d5ce250eb1
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_wait_queue.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wait_queue.c
+ * Implemenation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+
+struct _mali_osk_wait_queue_t_struct {
+	wait_queue_head_t wait_queue;
+};
+
+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void)
+{
+	_mali_osk_wait_queue_t *ret = NULL;
+
+	ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL);
+
+	if (NULL == ret) {
+		return ret;
+	}
+
+	init_waitqueue_head(&ret->wait_queue);
+	MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue));
+
+	return ret;
+}
+
+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event(queue->wait_queue, condition(data));
+}
+
+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+	MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue));
+	wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout));
+}
+
+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue)
+{
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* if queue is empty, don't attempt to wake up its elements */
+	if (!waitqueue_active(&queue->wait_queue)) return;
+
+	MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue));
+
+	wake_up_all(&queue->wait_queue);
+
+	MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue));
+}
+
+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue)
+{
+	/* Parameter validation  */
+	MALI_DEBUG_ASSERT_POINTER(queue);
+
+	/* Linux requires no explicit termination of wait queues */
+	kfree(queue);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_osk_wq.c b/drivers/gpu/arm/utgard/linux/mali_osk_wq.c
new file mode 100644
index 000000000000..2c34c91a7922
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_osk_wq.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_osk_wq.c
+ * Implementation of the OS abstraction layer for the kernel device driver
+ */
+
+#include <linux/slab.h> /* For memory allocation */
+#include <linux/workqueue.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_kernel_license.h"
+#include "mali_kernel_linux.h"
+
+typedef struct _mali_osk_wq_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	mali_bool high_pri;
+	struct work_struct work_handle;
+} mali_osk_wq_work_object_t;
+
+typedef struct _mali_osk_wq_delayed_work_s {
+	_mali_osk_wq_work_handler_t handler;
+	void *data;
+	struct delayed_work work;
+} mali_osk_wq_delayed_work_object_t;
+
+#if MALI_LICENSE_IS_GPL
+static struct workqueue_struct *mali_wq_normal = NULL;
+static struct workqueue_struct *mali_wq_high = NULL;
+#endif
+
+static void _mali_osk_wq_work_func(struct work_struct *work);
+
+_mali_osk_errcode_t _mali_osk_wq_init(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL == mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL == mali_wq_high);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
+	mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0);
+	mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0);
+#else
+	mali_wq_normal = create_workqueue("mali");
+	mali_wq_high = create_workqueue("mali_high_pri");
+#endif
+	if (NULL == mali_wq_normal || NULL == mali_wq_high) {
+		MALI_PRINT_ERROR(("Unable to create Mali workqueues\n"));
+
+		if (mali_wq_normal) destroy_workqueue(mali_wq_normal);
+		if (mali_wq_high)   destroy_workqueue(mali_wq_high);
+
+		mali_wq_normal = NULL;
+		mali_wq_high   = NULL;
+
+		return _MALI_OSK_ERR_FAULT;
+	}
+#endif /* MALI_LICENSE_IS_GPL */
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_osk_wq_flush(void)
+{
+#if MALI_LICENSE_IS_GPL
+	flush_workqueue(mali_wq_high);
+	flush_workqueue(mali_wq_normal);
+#else
+	flush_scheduled_work();
+#endif
+}
+
+void _mali_osk_wq_term(void)
+{
+#if MALI_LICENSE_IS_GPL
+	MALI_DEBUG_ASSERT(NULL != mali_wq_normal);
+	MALI_DEBUG_ASSERT(NULL != mali_wq_high);
+
+	flush_workqueue(mali_wq_normal);
+	destroy_workqueue(mali_wq_normal);
+
+	flush_workqueue(mali_wq_high);
+	destroy_workqueue(mali_wq_high);
+
+	mali_wq_normal = NULL;
+	mali_wq_high   = NULL;
+#else
+	flush_scheduled_work();
+#endif
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_FALSE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+	work->high_pri = MALI_TRUE;
+
+	INIT_WORK(&work->work_handle, _mali_osk_wq_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	_mali_osk_wq_flush();
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_normal, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work)
+{
+	mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work;
+#if MALI_LICENSE_IS_GPL
+	queue_work(mali_wq_high, &work_object->work_handle);
+#else
+	schedule_work(&work_object->work_handle);
+#endif
+}
+
+static void _mali_osk_wq_work_func(struct work_struct *work)
+{
+	mali_osk_wq_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle);
+
+#if MALI_LICENSE_IS_GPL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	/* We want highest Dynamic priority of the thread so that the Jobs depending
+	** on this thread could be scheduled in time. Without this, this thread might
+	** sometimes need to wait for some threads in user mode to finish its round-robin
+	** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation
+	** of high-priority workqueue in new kernel, this only happens in older kernel.
+	*/
+	if (MALI_TRUE == work_object->high_pri) {
+		set_user_nice(current, -19);
+	}
+#endif
+#endif /* MALI_LICENSE_IS_GPL */
+
+	work_object->handler(work_object->data);
+}
+
+static void _mali_osk_wq_delayed_work_func(struct work_struct *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object;
+
+	work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work);
+	work_object->handler(work_object->data);
+}
+
+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data)
+{
+	mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL);
+
+	if (NULL == work) return NULL;
+
+	work->handler = handler;
+	work->data = data;
+
+	INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func);
+
+	return work;
+}
+
+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	kfree(work_object);
+}
+
+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+	cancel_delayed_work_sync(&work_object->work);
+}
+
+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay)
+{
+	mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work;
+
+#if MALI_LICENSE_IS_GPL
+	queue_delayed_work(mali_wq_normal, &work_object->work, delay);
+#else
+	schedule_delayed_work(&work_object->work, delay);
+#endif
+
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_pmu_power_up_down.c b/drivers/gpu/arm/utgard/linux/mali_pmu_power_up_down.c
new file mode 100644
index 000000000000..61ff5c8fdca8
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_pmu_power_up_down.c
@@ -0,0 +1,23 @@
+/**
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_pmu_power_up_down.c
+ */
+
+#include <linux/module.h>
+#include "mali_executor.h"
+
+int mali_perf_set_num_pp_cores(unsigned int num_cores)
+{
+	return mali_executor_set_perf_level(num_cores, MALI_FALSE);
+}
+
+EXPORT_SYMBOL(mali_perf_set_num_pp_cores);
diff --git a/drivers/gpu/arm/utgard/linux/mali_profiling_events.h b/drivers/gpu/arm/utgard/linux/mali_profiling_events.h
new file mode 100644
index 000000000000..0b90e8c5cf26
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_profiling_events.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_EVENTS_H__
+#define __MALI_PROFILING_EVENTS_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_events.h>
+
+#endif /* __MALI_PROFILING_EVENTS_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_profiling_gator_api.h b/drivers/gpu/arm/utgard/linux/mali_profiling_gator_api.h
new file mode 100644
index 000000000000..c98d127366ba
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_profiling_gator_api.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012-2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_GATOR_API_H__
+#define __MALI_PROFILING_GATOR_API_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_profiling_gator_api.h>
+
+#endif /* __MALI_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_profiling_internal.c b/drivers/gpu/arm/utgard/linux/mali_profiling_internal.c
new file mode 100644
index 000000000000..12aef4194ff5
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_profiling_internal.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_osk_mali.h"
+#include "mali_ukk.h"
+#include "mali_timestamp.h"
+#include "mali_osk_profiling.h"
+#include "mali_user_settings_db.h"
+#include "mali_profiling_internal.h"
+
+typedef struct mali_profiling_entry {
+	u64 timestamp;
+	u32 event_id;
+	u32 data[5];
+} mali_profiling_entry;
+
+typedef enum mali_profiling_state {
+	MALI_PROFILING_STATE_UNINITIALIZED,
+	MALI_PROFILING_STATE_IDLE,
+	MALI_PROFILING_STATE_RUNNING,
+	MALI_PROFILING_STATE_RETURN,
+} mali_profiling_state;
+
+static _mali_osk_mutex_t *lock = NULL;
+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+static mali_profiling_entry *profile_entries = NULL;
+static _mali_osk_atomic_t profile_insert_index;
+static u32 profile_mask = 0;
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4);
+
+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned
+			       int d2, unsigned int d3, unsigned int d4))
+{
+	add_event(event_id, d0, d1, d2, d3, d4);
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start)
+{
+	profile_entries = NULL;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING);
+	if (NULL == lock) {
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+
+	if (MALI_TRUE == auto_start) {
+		u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */
+
+		mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE);
+		if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) {
+			return _MALI_OSK_ERR_FAULT;
+		}
+	}
+
+	return _MALI_OSK_ERR_OK;
+}
+
+void _mali_internal_profiling_term(void)
+{
+	u32 count;
+
+	/* Ensure profiling is stopped */
+	_mali_internal_profiling_stop(&count);
+
+	prof_state = MALI_PROFILING_STATE_UNINITIALIZED;
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	if (NULL != lock) {
+		_mali_osk_mutex_term(lock);
+		lock = NULL;
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit)
+{
+	_mali_osk_errcode_t ret;
+	mali_profiling_entry *new_profile_entries;
+
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING == prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_BUSY;
+	}
+
+	new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry));
+
+	if (NULL == new_profile_entries) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_NOMEM;
+	}
+
+	if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) {
+		*limit = MALI_PROFILING_MAX_BUFFER_ENTRIES;
+	}
+
+	profile_mask = 1;
+	while (profile_mask <= *limit) {
+		profile_mask <<= 1;
+	}
+	profile_mask >>= 1;
+
+	*limit = profile_mask;
+
+	profile_mask--; /* turns the power of two into a mask of one less */
+
+	if (MALI_PROFILING_STATE_IDLE != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		_mali_osk_vfree(new_profile_entries);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	profile_entries = new_profile_entries;
+
+	ret = _mali_timestamp_reset();
+
+	if (_MALI_OSK_ERR_OK == ret) {
+		prof_state = MALI_PROFILING_STATE_RUNNING;
+	} else {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	register_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+	return ret;
+}
+
+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4)
+{
+	u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask;
+
+	profile_entries[cur_index].timestamp = _mali_timestamp_get();
+	profile_entries[cur_index].event_id = event_id;
+	profile_entries[cur_index].data[0] = data0;
+	profile_entries[cur_index].data[1] = data1;
+	profile_entries[cur_index].data[2] = data2;
+	profile_entries[cur_index].data[3] = data3;
+	profile_entries[cur_index].data[4] = data4;
+
+	/* If event is "leave API function", add current memory usage to the event
+	 * as data point 4.  This is used in timeline profiling to indicate how
+	 * much memory was used when leaving a function. */
+	if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) {
+		profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage();
+	}
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RUNNING != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	/* go into return state (user to retreive events), no more events will be added after this */
+	prof_state = MALI_PROFILING_STATE_RETURN;
+
+	unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL);
+
+	_mali_osk_mutex_signal(lock);
+
+	tracepoint_synchronize_unregister();
+
+	*count = _mali_osk_atomic_read(&profile_insert_index);
+	if (*count > profile_mask) *count = profile_mask;
+
+	return _MALI_OSK_ERR_OK;
+}
+
+u32 _mali_internal_profiling_get_count(void)
+{
+	u32 retval = 0;
+
+	_mali_osk_mutex_wait(lock);
+	if (MALI_PROFILING_STATE_RETURN == prof_state) {
+		retval = _mali_osk_atomic_read(&profile_insert_index);
+		if (retval > profile_mask) retval = profile_mask;
+	}
+	_mali_osk_mutex_signal(lock);
+
+	return retval;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5])
+{
+	u32 raw_index = _mali_osk_atomic_read(&profile_insert_index);
+
+	_mali_osk_mutex_wait(lock);
+
+	if (index < profile_mask) {
+		if ((raw_index & ~profile_mask) != 0) {
+			index += raw_index;
+			index &= profile_mask;
+		}
+
+		if (prof_state != MALI_PROFILING_STATE_RETURN) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+		}
+
+		if (index >= raw_index) {
+			_mali_osk_mutex_signal(lock);
+			return _MALI_OSK_ERR_FAULT;
+		}
+
+		*timestamp = profile_entries[index].timestamp;
+		*event_id = profile_entries[index].event_id;
+		data[0] = profile_entries[index].data[0];
+		data[1] = profile_entries[index].data[1];
+		data[2] = profile_entries[index].data[2];
+		data[3] = profile_entries[index].data[3];
+		data[4] = profile_entries[index].data[4];
+	} else {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_FAULT;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+_mali_osk_errcode_t _mali_internal_profiling_clear(void)
+{
+	_mali_osk_mutex_wait(lock);
+
+	if (MALI_PROFILING_STATE_RETURN != prof_state) {
+		_mali_osk_mutex_signal(lock);
+		return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */
+	}
+
+	prof_state = MALI_PROFILING_STATE_IDLE;
+	profile_mask = 0;
+	_mali_osk_atomic_init(&profile_insert_index, 0);
+
+	if (NULL != profile_entries) {
+		_mali_osk_vfree(profile_entries);
+		profile_entries = NULL;
+	}
+
+	_mali_osk_mutex_signal(lock);
+	return _MALI_OSK_ERR_OK;
+}
+
+mali_bool _mali_internal_profiling_is_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE;
+}
+
+mali_bool _mali_internal_profiling_have_recording(void)
+{
+	return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_profiling_internal.h b/drivers/gpu/arm/utgard/linux/mali_profiling_internal.h
new file mode 100644
index 000000000000..1c6f4da691d2
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_profiling_internal.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_PROFILING_INTERNAL_H__
+#define __MALI_PROFILING_INTERNAL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mali_osk.h"
+
+int _mali_internal_profiling_init(mali_bool auto_start);
+void _mali_internal_profiling_term(void);
+
+mali_bool _mali_internal_profiling_is_recording(void);
+mali_bool _mali_internal_profiling_have_recording(void);
+_mali_osk_errcode_t _mali_internal_profiling_clear(void);
+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]);
+u32 _mali_internal_profiling_get_count(void);
+int _mali_internal_profiling_stop(u32 *count);
+int _mali_internal_profiling_start(u32 *limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_PROFILING_INTERNAL_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_sync.c b/drivers/gpu/arm/utgard/linux/mali_sync.c
new file mode 100644
index 000000000000..dc1e3a2a4d73
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_sync.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_sync.h"
+
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_timeline.h"
+#include "mali_executor.h"
+
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/fcntl.h>
+
+struct mali_sync_pt {
+	struct sync_pt         sync_pt;
+	struct mali_sync_flag *flag;
+	struct sync_timeline *sync_tl;  /**< Sync timeline this pt is connected to. */
+};
+
+/**
+ * The sync flag is used to connect sync fences to the Mali Timeline system.  Sync fences can be
+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled.
+ */
+struct mali_sync_flag {
+	struct sync_timeline *sync_tl;  /**< Sync timeline this flag is connected to. */
+	u32                   point;    /**< Point on timeline. */
+	int                   status;   /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */
+	struct kref           refcount; /**< Reference count. */
+};
+
+/**
+ * Mali sync timeline is used to connect mali timeline to sync_timeline.
+ * When fence timeout can print more detailed mali timeline system info.
+ */
+struct mali_sync_timeline_container {
+	struct sync_timeline sync_timeline;
+	struct mali_timeline *timeline;
+};
+
+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, sync_pt);
+}
+
+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl)
+{
+	return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt, *new_mpt;
+	struct sync_pt *new_pt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == new_pt) return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+
+	mali_sync_flag_get(mpt->flag);
+	new_mpt->flag = mpt->flag;
+	new_mpt->sync_tl = mpt->sync_tl;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	MALI_DEBUG_ASSERT_POINTER(mpt->flag);
+
+	return mpt->flag->status;
+}
+
+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb)
+{
+	struct mali_sync_pt *mpta;
+	struct mali_sync_pt *mptb;
+	u32 a, b;
+
+	MALI_DEBUG_ASSERT_POINTER(pta);
+	MALI_DEBUG_ASSERT_POINTER(ptb);
+	mpta = to_mali_sync_pt(pta);
+	mptb = to_mali_sync_pt(ptb);
+
+	MALI_DEBUG_ASSERT_POINTER(mpta->flag);
+	MALI_DEBUG_ASSERT_POINTER(mptb->flag);
+
+	a = mpta->flag->point;
+	b = mptb->flag->point;
+
+	if (a == b) return 0;
+
+	return ((b - a) < (a - b) ? -1 : 1);
+}
+
+static void timeline_free_pt(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(pt);
+	mpt = to_mali_sync_pt(pt);
+
+	mali_sync_flag_put(mpt->flag);
+}
+
+static void timeline_release(struct sync_timeline *sync_timeline)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	/* always signaled timeline didn't have mali container */
+	if (mali_tl) {
+		if (NULL != mali_tl->spinlock) {
+			mali_spinlock_reentrant_term(mali_tl->spinlock);
+		}
+		_mali_osk_free(mali_tl);
+	}
+
+	module_put(THIS_MODULE);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(s);
+	MALI_DEBUG_ASSERT_POINTER(sync_pt);
+
+	mpt = to_mali_sync_pt(sync_pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		seq_printf(s, "%u", mpt->flag->point);
+	} else {
+		seq_printf(s, "uninitialized");
+	}
+}
+
+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		seq_printf(s, "oldest (%u) ", mali_tl->point_oldest);
+		seq_printf(s, "next (%u)", mali_tl->point_next);
+		seq_printf(s, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_print_timeline(mali_tl, s);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#else
+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(str);
+	MALI_DEBUG_ASSERT_POINTER(pt);
+
+	mpt = to_mali_sync_pt(pt);
+
+	/* It is possible this sync point is just under construct,
+	 * make sure the flag is valid before accessing it
+	*/
+	if (mpt->flag) {
+		_mali_osk_snprintf(str, size, "%u", mpt->flag->point);
+	} else {
+		_mali_osk_snprintf(str, size, "uninitialized");
+	}
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct mali_sync_timeline_container *mali_sync_tl = NULL;
+	struct mali_timeline *mali_tl = NULL;
+
+	MALI_DEBUG_ASSERT_POINTER(timeline);
+
+	mali_sync_tl = to_mali_sync_tl_container(timeline);
+	MALI_DEBUG_ASSERT_POINTER(mali_sync_tl);
+
+	mali_tl = mali_sync_tl->timeline;
+
+	if (NULL != mali_tl) {
+		_mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest);
+		_mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next);
+		_mali_osk_snprintf(str, size, "\n");
+
+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS)
+		{
+			u32 tid = _mali_osk_get_tid();
+			struct mali_timeline_system *system = mali_tl->system;
+
+			mali_spinlock_reentrant_wait(mali_tl->spinlock, tid);
+			if (!mali_tl->destroyed) {
+				mali_spinlock_reentrant_wait(system->spinlock, tid);
+				mali_timeline_debug_direct_print_timeline(mali_tl);
+				mali_spinlock_reentrant_signal(system->spinlock, tid);
+			}
+			mali_spinlock_reentrant_signal(mali_tl->spinlock, tid);
+
+			/* dump job queue status and group running status */
+			mali_executor_status_dump();
+		}
+#endif
+	}
+}
+#endif
+
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name    = "Mali",
+	.dup            = timeline_dup,
+	.has_signaled   = timeline_has_signaled,
+	.compare        = timeline_compare,
+	.free_pt        = timeline_free_pt,
+	.release_obj    = timeline_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	.print_pt       = timeline_print_pt,
+	.print_obj      = timeline_print_obj,
+#else
+	.pt_value_str = timeline_pt_value_str,
+	.timeline_value_str = timeline_value_str,
+#endif
+};
+
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name)
+{
+	struct sync_timeline *sync_tl;
+	struct mali_sync_timeline_container *mali_sync_tl;
+
+	sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name);
+	if (NULL == sync_tl) return NULL;
+
+	mali_sync_tl = to_mali_sync_tl_container(sync_tl);
+	mali_sync_tl->timeline = timeline;
+
+	/* Grab a reference on the module to ensure the callbacks are present
+	 * as long some timeline exists. The reference is released when the
+	 * timeline is freed.
+	 * Since this function is called from a ioctl on an open file we know
+	 * we already have a reference, so using __module_get is safe. */
+	__module_get(THIS_MODULE);
+
+	return sync_tl;
+}
+
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence)
+{
+	s32 fd = -1;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(sync_fence);
+		return -1;
+	}
+	sync_fence_install(sync_fence, fd);
+
+	return fd;
+}
+
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2)
+{
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+	MALI_DEBUG_ASSERT_POINTER(sync_fence1);
+
+	sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2);
+	sync_fence_put(sync_fence1);
+	sync_fence_put(sync_fence2);
+
+	return sync_fence;
+}
+
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl)
+{
+	struct mali_sync_flag *flag;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(sync_tl);
+
+	flag = mali_sync_flag_create(sync_tl, 0);
+	if (NULL == flag) return NULL;
+
+	sync_fence = mali_sync_flag_create_fence(flag);
+
+	mali_sync_flag_signal(flag, 0);
+	mali_sync_flag_put(flag);
+
+	return sync_fence;
+}
+
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point)
+{
+	struct mali_sync_flag *flag;
+
+	if (NULL == sync_tl) return NULL;
+
+	flag = _mali_osk_calloc(1, sizeof(*flag));
+	if (NULL == flag) return NULL;
+
+	flag->sync_tl = sync_tl;
+	flag->point = point;
+
+	flag->status = 0;
+	kref_init(&flag->refcount);
+
+	return flag;
+}
+
+void mali_sync_flag_get(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_get(&flag->refcount);
+}
+
+/**
+ * Free sync flag.
+ *
+ * @param ref kref object embedded in sync flag that should be freed.
+ */
+static void mali_sync_flag_free(struct kref *ref)
+{
+	struct mali_sync_flag *flag;
+
+	MALI_DEBUG_ASSERT_POINTER(ref);
+	flag = container_of(ref, struct mali_sync_flag, refcount);
+
+	_mali_osk_free(flag);
+}
+
+void mali_sync_flag_put(struct mali_sync_flag *flag)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	kref_put(&flag->refcount, mali_sync_flag_free);
+}
+
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error)
+{
+	MALI_DEBUG_ASSERT_POINTER(flag);
+
+	MALI_DEBUG_ASSERT(0 == flag->status);
+	flag->status = (0 > error) ? error : 1;
+
+	_mali_osk_write_mem_barrier();
+
+	sync_timeline_signal(flag->sync_tl);
+}
+
+/**
+ * Create a sync point attached to given sync flag.
+ *
+ * @note Sync points must be triggered in *exactly* the same order as they are created.
+ *
+ * @param flag Sync flag.
+ * @return New sync point if successful, NULL if not.
+ */
+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag)
+{
+	struct sync_pt *pt;
+	struct mali_sync_pt *mpt;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt));
+	if (NULL == pt) return NULL;
+
+	mali_sync_flag_get(flag);
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->flag = flag;
+	mpt->sync_tl = flag->sync_tl;
+
+	return pt;
+}
+
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag)
+{
+	struct sync_pt    *sync_pt;
+	struct sync_fence *sync_fence;
+
+	MALI_DEBUG_ASSERT_POINTER(flag);
+	MALI_DEBUG_ASSERT_POINTER(flag->sync_tl);
+
+	sync_pt = mali_sync_flag_create_pt(flag);
+	if (NULL == sync_pt) return NULL;
+
+	sync_fence = sync_fence_create("mali_flag_fence", sync_pt);
+	if (NULL == sync_fence) {
+		sync_pt_free(sync_pt);
+		return NULL;
+	}
+
+	return sync_fence;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_sync.h b/drivers/gpu/arm/utgard/linux/mali_sync.h
new file mode 100644
index 000000000000..0c541ff9a2e0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_sync.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_sync.h
+ *
+ * Mali interface for Linux sync objects.
+ */
+
+#ifndef _MALI_SYNC_H_
+#define _MALI_SYNC_H_
+
+#if defined(CONFIG_SYNC)
+
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#include <linux/sync.h>
+#else
+#include <sync.h>
+#endif
+
+
+#include "mali_osk.h"
+
+struct mali_sync_flag;
+struct mali_timeline;
+
+/**
+ * Create a sync timeline.
+ *
+ * @param name Name of the sync timeline.
+ * @return The new sync timeline if successful, NULL if not.
+ */
+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name);
+
+/**
+ * Creates a file descriptor representing the sync fence.  Will release sync fence if allocation of
+ * file descriptor fails.
+ *
+ * @param sync_fence Sync fence.
+ * @return File descriptor representing sync fence if successful, or -1 if not.
+ */
+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence);
+
+/**
+ * Merges two sync fences.  Both input sync fences will be released.
+ *
+ * @param sync_fence1 First sync fence.
+ * @param sync_fence2 Second sync fence.
+ * @return New sync fence that is the result of the merger if successful, or NULL if not.
+ */
+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2);
+
+/**
+ * Create a sync fence that is already signaled.
+ *
+ * @param tl Sync timeline.
+ * @return New signaled sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl);
+
+/**
+ * Create a sync flag.
+ *
+ * @param sync_tl Sync timeline.
+ * @param point Point on Mali timeline.
+ * @return New sync flag if successful, NULL if not.
+ */
+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point);
+
+/**
+ * Grab sync flag reference.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_get(struct mali_sync_flag *flag);
+
+/**
+ * Release sync flag reference.  If this was the last reference, the sync flag will be freed.
+ *
+ * @param flag Sync flag.
+ */
+void mali_sync_flag_put(struct mali_sync_flag *flag);
+
+/**
+ * Signal sync flag.  All sync fences created from this flag will be signaled.
+ *
+ * @param flag Sync flag to signal.
+ * @param error Negative error code, or 0 if no error.
+ */
+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error);
+
+/**
+ * Create a sync fence attached to given sync flag.
+ *
+ * @param flag Sync flag.
+ * @return New sync fence if successful, NULL if not.
+ */
+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag);
+
+#endif /* defined(CONFIG_SYNC) */
+
+#endif /* _MALI_SYNC_H_ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_uk_types.h b/drivers/gpu/arm/utgard/linux/mali_uk_types.h
new file mode 100644
index 000000000000..1884cdbba424
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_uk_types.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2012, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_UK_TYPES_H__
+#define __MALI_UK_TYPES_H__
+
+/* Simple wrapper in order to find the OS specific location of this file */
+#include <linux/mali/mali_utgard_uk_types.h>
+
+#endif /* __MALI_UK_TYPES_H__ */
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_core.c b/drivers/gpu/arm/utgard/linux/mali_ukk_core.c
new file mode 100644
index 000000000000..41c1f48fc6c7
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_core.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <linux/slab.h>     /* memort allocation functions */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs)
+{
+	_mali_uk_get_api_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs)
+{
+	_mali_uk_get_api_version_v2_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_api_version_v2(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+	if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT;
+
+	return 0;
+}
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs)
+{
+	_mali_uk_wait_for_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_wait_for_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) {
+		kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+		if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT;
+	} else {
+		if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT;
+	}
+
+	return 0;
+}
+
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs)
+{
+	_mali_uk_post_notification_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 != get_user(kargs.type, &uargs->type)) {
+		return -EFAULT;
+	}
+
+	err = _mali_ukk_post_notification(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs)
+{
+	_mali_uk_get_user_settings_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_user_settings(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = 0; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT;
+
+	return 0;
+}
+
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs)
+{
+	_mali_uk_request_high_priority_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_request_high_priority(&kargs);
+
+	kargs.ctx = 0;
+
+	return map_errcode(err);
+}
+
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs)
+{
+	_mali_uk_pending_submit_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_pending_submit(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_gp.c b/drivers/gpu/arm/utgard/linux/mali_ukk_gp.c
new file mode 100644
index 000000000000..d4144c0f5e48
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_gp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs)
+{
+	_mali_uk_get_gp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err =  _mali_ukk_get_gp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs)
+{
+	_mali_uk_gp_suspend_response_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_gp_suspend_response(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT;
+
+	/* no known transactions to roll-back */
+	return 0;
+}
+
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_gp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_gp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	/* no known transactions to roll-back */
+
+	if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_mem.c b/drivers/gpu/arm/utgard/linux/mali_ukk_mem.c
new file mode 100644
index 000000000000..ca1cba0585b3
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_mem.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs)
+{
+	_mali_uk_alloc_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_allocate(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs)
+{
+	_mali_uk_free_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_free(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs)
+{
+	_mali_uk_bind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_bind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs)
+{
+	_mali_uk_unbind_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_unbind(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs)
+{
+	_mali_uk_cow_mem_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs)
+{
+	_mali_uk_cow_modify_range_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_cow_modify_range(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs)
+{
+	_mali_uk_mem_resize_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) {
+		return -EFAULT;
+	}
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_mem_resize(&kargs);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs)
+{
+	_mali_uk_mem_write_safe_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	/* Check if we can access the buffers */
+	if (!access_ok(VERIFY_WRITE, kargs.dest, kargs.size)
+	    || !access_ok(VERIFY_READ, kargs.src, kargs.size)) {
+		return -EINVAL;
+	}
+
+	/* Check if size wraps */
+	if ((kargs.size + kargs.dest) <= kargs.dest
+	    || (kargs.size + kargs.src) <= kargs.src) {
+		return -EINVAL;
+	}
+
+	err = _mali_ukk_mem_write_safe(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != put_user(kargs.size, &uargs->size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs)
+{
+	_mali_uk_query_mmu_page_table_dump_size_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_query_mmu_page_table_dump_size(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT;
+
+	return 0;
+}
+
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs)
+{
+	_mali_uk_dump_mmu_page_table_s kargs;
+	_mali_osk_errcode_t err;
+	void __user *user_buffer;
+	void *buffer = NULL;
+	int rc = -EFAULT;
+
+	/* validate input */
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	/* the session_data pointer was validated by caller */
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s)))
+		goto err_exit;
+
+	user_buffer = (void __user *)(uintptr_t)kargs.buffer;
+	if (!access_ok(VERIFY_WRITE, user_buffer, kargs.size))
+		goto err_exit;
+
+	/* allocate temporary buffer (kernel side) to store mmu page table info */
+	if (kargs.size <= 0)
+		return -EINVAL;
+	/* Allow at most 8MiB buffers, this is more than enough to dump a fully
+	 * populated page table. */
+	if (kargs.size > SZ_8M)
+		return -EINVAL;
+
+	buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size);
+	if (NULL == buffer) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.buffer = (uintptr_t)buffer;
+	err = _mali_ukk_dump_mmu_page_table(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		rc = map_errcode(err);
+		goto err_exit;
+	}
+
+	/* copy mmu page table info back to user space and update pointers */
+	if (0 != copy_to_user(user_buffer, buffer, kargs.size))
+		goto err_exit;
+
+	kargs.register_writes = kargs.register_writes -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+	kargs.page_table_dump = kargs.page_table_dump -
+				(uintptr_t)buffer + (uintptr_t)user_buffer;
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(kargs)))
+		goto err_exit;
+
+	rc = 0;
+
+err_exit:
+	if (buffer) _mali_osk_vfree(buffer);
+	return rc;
+}
+
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+	_mali_uk_profiling_memory_usage_get_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_mem_usage_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_pp.c b/drivers/gpu/arm/utgard/linux/mali_ukk_pp.c
new file mode 100644
index 000000000000..4c1c381cb90f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_pp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs)
+{
+	_mali_osk_errcode_t err;
+
+	/* If the jobs were started successfully, 0 is returned.  If there was an error, but the
+	 * jobs were started, we return -ENOENT.  For anything else returned, the jobs were not
+	 * started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	err = _mali_ukk_pp_and_gp_start_job(session_data, uargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	return 0;
+}
+
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs)
+{
+	_mali_uk_get_pp_number_of_cores_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	err = _mali_ukk_get_pp_number_of_cores(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs)
+{
+	_mali_uk_get_pp_core_version_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_get_pp_core_version(&kargs);
+	if (_MALI_OSK_ERR_OK != err) return map_errcode(err);
+
+	if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT;
+
+	return 0;
+}
+
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs)
+{
+	_mali_uk_pp_disable_wb_s kargs;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session_data, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+	_mali_ukk_pp_job_disable_wb(&kargs);
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_profiling.c b/drivers/gpu/arm/utgard/linux/mali_ukk_profiling.c
new file mode 100644
index 000000000000..e84544dee07d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_profiling.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+#include <linux/slab.h>
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs)
+{
+	_mali_uk_profiling_add_event_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_add_event(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs)
+{
+	_mali_uk_sw_counters_report_s kargs;
+	_mali_osk_errcode_t err;
+	u32 *counter_buffer;
+	u32 __user *counters;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) {
+		return -EFAULT;
+	}
+
+	/* make sure that kargs.num_counters is [at least somewhat] sane */
+	if (kargs.num_counters > 10000) {
+		MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n"));
+		return -EINVAL;
+	}
+
+	counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL);
+	if (NULL == counter_buffer) {
+		return -ENOMEM;
+	}
+
+	counters = (u32 *)(uintptr_t)kargs.counters;
+
+	if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) {
+		kfree(counter_buffer);
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	kargs.counters = (uintptr_t)counter_buffer;
+
+	err = _mali_ukk_sw_counters_report(&kargs);
+
+	kfree(counter_buffer);
+
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs)
+{
+	_mali_uk_profiling_stream_fd_get_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_profiling_stream_fd_get(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs)
+{
+	_mali_uk_profiling_control_set_s kargs;
+	_mali_osk_errcode_t err;
+	u8 *kernel_control_data = NULL;
+	u8 *kernel_response_data = NULL;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT;
+	if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT;
+
+	kargs.ctx = (uintptr_t)session_data;
+
+	if (0 !=  kargs.control_packet_size) {
+
+		kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size);
+		if (NULL == kernel_control_data) {
+			return -ENOMEM;
+		}
+
+		MALI_DEBUG_ASSERT(0 != kargs.response_packet_size);
+
+		kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size);
+		if (NULL == kernel_response_data) {
+			_mali_osk_free(kernel_control_data);
+			return -ENOMEM;
+		}
+
+		kargs.control_packet_data = (uintptr_t)kernel_control_data;
+		kargs.response_packet_data = (uintptr_t)kernel_response_data;
+
+		if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return map_errcode(err);
+		}
+
+		if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) {
+			_mali_osk_free(kernel_control_data);
+			_mali_osk_free(kernel_response_data);
+			return -EFAULT;
+		}
+
+		_mali_osk_free(kernel_control_data);
+		_mali_osk_free(kernel_response_data);
+	} else {
+
+		err = _mali_ukk_profiling_control_set(&kargs);
+		if (_MALI_OSK_ERR_OK != err) {
+			return map_errcode(err);
+		}
+
+	}
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_soft_job.c b/drivers/gpu/arm/utgard/linux/mali_ukk_soft_job.c
new file mode 100644
index 000000000000..11c70060e489
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_soft_job.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_soft_job.h"
+#include "mali_timeline.h"
+
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs)
+{
+	_mali_uk_soft_job_start_s kargs;
+	u32 type, point;
+	u64 user_job;
+	struct mali_timeline_fence fence;
+	struct mali_soft_job *job = NULL;
+	u32 __user *job_id_ptr = NULL;
+
+	/* If the job was started successfully, 0 is returned.  If there was an error, but the job
+	 * was started, we return -ENOENT.  For anything else returned, the job was not started. */
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+	MALI_CHECK_NON_NULL(session, -EINVAL);
+
+	MALI_DEBUG_ASSERT_POINTER(session->soft_job_system);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) {
+		return -EFAULT;
+	}
+
+	type = kargs.type;
+	user_job = kargs.user_job;
+	job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence);
+
+	if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) {
+		MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n"));
+		return -EINVAL;
+	}
+
+	/* Create soft job. */
+	job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job);
+	if (unlikely(NULL == job)) {
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Write job id back to user space. */
+	if (0 != put_user(job->id, job_id_ptr)) {
+		MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id"));
+		mali_soft_job_destroy(job);
+		return map_errcode(_MALI_OSK_ERR_NOMEM);
+	}
+
+	/* Start soft job. */
+	point = mali_soft_job_start(job, &fence);
+
+	if (0 != put_user(point, &uargs->point)) {
+		/* Let user space know that something failed after the job was started. */
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs)
+{
+	u32 job_id;
+	_mali_osk_errcode_t err;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT;
+
+	err = mali_soft_job_system_signal_job(session->soft_job_system, job_id);
+
+	return map_errcode(err);
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_timeline.c b/drivers/gpu/arm/utgard/linux/mali_ukk_timeline.c
new file mode 100644
index 000000000000..484d4041c869
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_timeline.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+#include "mali_timeline.h"
+#include "mali_timeline_fence_wait.h"
+#include "mali_timeline_sync_fence.h"
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs)
+{
+	u32 val;
+	mali_timeline_id timeline;
+	mali_timeline_point point;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != get_user(val, &uargs->timeline)) return -EFAULT;
+
+	if (MALI_UK_TIMELINE_MAX <= val) {
+		return -EINVAL;
+	}
+
+	timeline = (mali_timeline_id)val;
+
+	point = mali_timeline_system_get_latest_point(session->timeline_system, timeline);
+
+	if (0 != put_user(point, &uargs->point)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs)
+{
+	u32 timeout, status;
+	mali_bool ret;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT;
+
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+	ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout);
+	status = (MALI_TRUE == ret ? 1 : 0);
+
+	if (0 != put_user(status, &uargs->status)) return -EFAULT;
+
+	return 0;
+}
+
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs)
+{
+	s32 sync_fd = -1;
+	_mali_uk_fence_t uk_fence;
+	struct mali_timeline_fence fence;
+
+	MALI_DEBUG_ASSERT_POINTER(session);
+
+	if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT;
+	mali_timeline_fence_copy_uk_fence(&fence, &uk_fence);
+
+#if defined(CONFIG_SYNC)
+	sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence);
+#else
+	sync_fd = -1;
+#endif /* defined(CONFIG_SYNC) */
+
+	if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_vsync.c b/drivers/gpu/arm/utgard/linux/mali_ukk_vsync.c
new file mode 100644
index 000000000000..487c2478df4d
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_vsync.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2011-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/fs.h>       /* file system operations */
+#include <asm/uaccess.h>    /* user space access */
+
+#include "mali_ukk.h"
+#include "mali_osk.h"
+#include "mali_kernel_common.h"
+#include "mali_session.h"
+#include "mali_ukk_wrappers.h"
+
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs)
+{
+	_mali_uk_vsync_event_report_s kargs;
+	_mali_osk_errcode_t err;
+
+	MALI_CHECK_NON_NULL(uargs, -EINVAL);
+
+	if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) {
+		return -EFAULT;
+	}
+
+	kargs.ctx = (uintptr_t)session_data;
+	err = _mali_ukk_vsync_event_report(&kargs);
+	if (_MALI_OSK_ERR_OK != err) {
+		return map_errcode(err);
+	}
+
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/utgard/linux/mali_ukk_wrappers.h b/drivers/gpu/arm/utgard/linux/mali_ukk_wrappers.h
new file mode 100644
index 000000000000..ac84e446bd12
--- /dev/null
+++ b/drivers/gpu/arm/utgard/linux/mali_ukk_wrappers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_ukk_wrappers.h
+ * Defines the wrapper functions for each user-kernel function
+ */
+
+#ifndef __MALI_UKK_WRAPPERS_H__
+#define __MALI_UKK_WRAPPERS_H__
+
+#include "mali_uk_types.h"
+#include "mali_osk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs);
+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs);
+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs);
+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs);
+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs);
+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs);
+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs);
+
+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs);
+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs);
+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs);
+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs);
+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs);
+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs);
+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs);
+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs);
+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs);
+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs);
+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs);
+
+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs);
+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs);
+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs);
+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs);
+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs);
+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs);
+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs);
+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs);
+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs);
+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs);
+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs);
+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs);
+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs);
+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs);
+
+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs);
+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs);
+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs);
+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs);
+
+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs);
+
+
+int map_errcode(_mali_osk_errcode_t err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UKK_WRAPPERS_H__ */
diff --git a/drivers/gpu/arm/utgard/platform/arm/arm.c b/drivers/gpu/arm/utgard/platform/arm/arm.c
new file mode 100644
index 000000000000..41ad63c0793b
--- /dev/null
+++ b/drivers/gpu/arm/utgard/platform/arm/arm.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file mali_platform.c
+ * Platform specific Mali driver functions for:
+ * - Realview Versatile platforms with ARM11 Mpcore and virtex 5.
+ * - Versatile Express platforms with ARM Cortex-A9 and virtex 6.
+ */
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/pm.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <asm/io.h>
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+#include <linux/dma-mapping.h>
+#include <linux/moduleparam.h>
+
+#include "arm_core_scaling.h"
+#include "mali_executor.h"
+
+
+static int mali_core_scaling_enable = 0;
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data);
+static u32 mali_read_phys(u32 phys_addr);
+#if defined(CONFIG_ARCH_REALVIEW)
+static void mali_write_phys(u32 phys_addr, u32 value);
+#endif
+
+#ifndef CONFIG_MALI_DT
+static void mali_platform_device_release(struct device *device);
+
+#if defined(CONFIG_ARCH_VEXPRESS)
+
+#if defined(CONFIG_ARM64)
+/* Juno + Mali-450 MP6 in V7 FPGA */
+static struct resource mali_gpu_resources_m450_mp6[] = {
+	MALI_GPU_RESOURCES_MALI450_MP6_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200)
+};
+
+static struct resource mali_gpu_resources_m470_mp4[] = {
+	MALI_GPU_RESOURCES_MALI470_MP4_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200)
+};
+
+static struct resource mali_gpu_resources_m470_mp3[] = {
+	MALI_GPU_RESOURCES_MALI470_MP3_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200)
+};
+
+static struct resource mali_gpu_resources_m470_mp2[] = {
+	MALI_GPU_RESOURCES_MALI470_MP2_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200)
+};
+
+static struct resource mali_gpu_resources_m470_mp1[] = {
+	MALI_GPU_RESOURCES_MALI470_MP1_PMU(0x6F040000, 200, 200, 200, 200, 200)
+};
+
+#else
+static struct resource mali_gpu_resources_m450_mp8[] = {
+	MALI_GPU_RESOURCES_MALI450_MP8_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68)
+};
+
+static struct resource mali_gpu_resources_m450_mp6[] = {
+	MALI_GPU_RESOURCES_MALI450_MP6_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68)
+};
+
+static struct resource mali_gpu_resources_m450_mp4[] = {
+	MALI_GPU_RESOURCES_MALI450_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68)
+};
+
+static struct resource mali_gpu_resources_m470_mp4[] = {
+	MALI_GPU_RESOURCES_MALI470_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68)
+};
+#endif /* CONFIG_ARM64 */
+
+#elif defined(CONFIG_ARCH_REALVIEW)
+
+static struct resource mali_gpu_resources_m300[] = {
+	MALI_GPU_RESOURCES_MALI300_PMU(0xC0000000, -1, -1, -1, -1)
+};
+
+static struct resource mali_gpu_resources_m400_mp1[] = {
+	MALI_GPU_RESOURCES_MALI400_MP1_PMU(0xC0000000, -1, -1, -1, -1)
+};
+
+static struct resource mali_gpu_resources_m400_mp2[] = {
+	MALI_GPU_RESOURCES_MALI400_MP2_PMU(0xC0000000, -1, -1, -1, -1, -1, -1)
+};
+
+#endif
+#endif
+
+static struct mali_gpu_device_data mali_gpu_data = {
+#ifndef CONFIG_MALI_DT
+	.pmu_switch_delay = 0xFF, /* do not have to be this high on FPGA, but it is good for testing to have a delay */
+	.max_job_runtime = 60000, /* 60 seconds */
+#if defined(CONFIG_ARCH_VEXPRESS)
+	.shared_mem_size = 256 * 1024 * 1024, /* 256MB */
+#endif
+#endif
+
+#if defined(CONFIG_ARCH_REALVIEW)
+	.dedicated_mem_start = 0x80000000, /* Physical start address (use 0xD0000000 for old indirect setup) */
+	.dedicated_mem_size = 0x10000000, /* 256MB */
+#endif
+#if defined(CONFIG_ARM64)
+	/* Some framebuffer drivers get the framebuffer dynamically, such as through GEM,
+	* in which the memory resource can't be predicted in advance.
+	*/
+	.fb_start = 0x0,
+	.fb_size = 0xFFFFF000,
+#else
+	.fb_start = 0xe0000000,
+	.fb_size = 0x01000000,
+#endif
+	.control_interval = 1000, /* 1000ms */
+	.utilization_callback = mali_gpu_utilization_callback,
+	.get_clock_info = NULL,
+	.get_freq = NULL,
+	.set_freq = NULL,
+};
+
+#ifndef CONFIG_MALI_DT
+static struct platform_device mali_gpu_device = {
+	.name = MALI_GPU_NAME_UTGARD,
+	.id = 0,
+	.dev.release = mali_platform_device_release,
+	.dev.dma_mask = &mali_gpu_device.dev.coherent_dma_mask,
+	.dev.coherent_dma_mask = DMA_BIT_MASK(32),
+
+	.dev.platform_data = &mali_gpu_data,
+};
+
+int mali_platform_device_register(void)
+{
+	int err = -1;
+	int num_pp_cores = 0;
+#if defined(CONFIG_ARCH_REALVIEW)
+	u32 m400_gp_version;
+#endif
+
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_register() called\n"));
+
+	/* Detect present Mali GPU and connect the correct resources to the device */
+#if defined(CONFIG_ARCH_VEXPRESS)
+
+#if defined(CONFIG_ARM64)
+	mali_gpu_device.dev.archdata.dma_ops = dma_ops;
+	if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n"));
+		num_pp_cores = 6;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6);
+		mali_gpu_device.resource = mali_gpu_resources_m450_mp6;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n"));
+		num_pp_cores = 4;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4);
+		mali_gpu_device.resource = mali_gpu_resources_m470_mp4;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n"));
+		num_pp_cores = 3;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp3);
+		mali_gpu_device.resource = mali_gpu_resources_m470_mp3;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n"));
+		num_pp_cores = 2;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp2);
+		mali_gpu_device.resource = mali_gpu_resources_m470_mp2;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n"));
+		num_pp_cores = 1;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp1);
+		mali_gpu_device.resource = mali_gpu_resources_m470_mp1;
+	}
+#else
+	if (mali_read_phys(0xFC000000) == 0x00000450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n"));
+		num_pp_cores = 8;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp8);
+		mali_gpu_device.resource = mali_gpu_resources_m450_mp8;
+	} else if (mali_read_phys(0xFC000000) == 0x40600450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n"));
+		num_pp_cores = 6;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6);
+		mali_gpu_device.resource = mali_gpu_resources_m450_mp6;
+	} else if (mali_read_phys(0xFC000000) == 0x40400450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n"));
+		num_pp_cores = 4;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp4);
+		mali_gpu_device.resource = mali_gpu_resources_m450_mp4;
+	} else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n"));
+		num_pp_cores = 4;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4);
+		mali_gpu_device.resource = mali_gpu_resources_m470_mp4;
+	}
+#endif /* CONFIG_ARM64 */
+
+#elif defined(CONFIG_ARCH_REALVIEW)
+
+	m400_gp_version = mali_read_phys(0xC000006C);
+	if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n"));
+		num_pp_cores = 1;
+		mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m300);
+		mali_gpu_device.resource = mali_gpu_resources_m300;
+		mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+	} else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) {
+		u32 fpga_fw_version = mali_read_phys(0xC0010000);
+		if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) {
+			/* Mali-400 MP1 r1p0 or r1p1 */
+			MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n"));
+			num_pp_cores = 1;
+			mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp1);
+			mali_gpu_device.resource = mali_gpu_resources_m400_mp1;
+			mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+		} else if (fpga_fw_version == 0x130C000F) {
+			/* Mali-400 MP2 r1p1 */
+			MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n"));
+			num_pp_cores = 2;
+			mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp2);
+			mali_gpu_device.resource = mali_gpu_resources_m400_mp2;
+			mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+		}
+	}
+
+#endif
+	/* Register the platform device */
+	err = platform_device_register(&mali_gpu_device);
+	if (0 == err) {
+#ifdef CONFIG_PM_RUNTIME
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+		pm_runtime_set_autosuspend_delay(&(mali_gpu_device.dev), 1000);
+		pm_runtime_use_autosuspend(&(mali_gpu_device.dev));
+#endif
+		pm_runtime_enable(&(mali_gpu_device.dev));
+#endif
+		MALI_DEBUG_ASSERT(0 < num_pp_cores);
+		mali_core_scaling_init(num_pp_cores);
+
+		return 0;
+	}
+
+	return err;
+}
+
+void mali_platform_device_unregister(void)
+{
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n"));
+
+	mali_core_scaling_term();
+	platform_device_unregister(&mali_gpu_device);
+
+	platform_device_put(&mali_gpu_device);
+
+#if defined(CONFIG_ARCH_REALVIEW)
+	mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */
+#endif
+}
+
+static void mali_platform_device_release(struct device *device)
+{
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_release() called\n"));
+}
+
+#else /* CONFIG_MALI_DT */
+int mali_platform_device_init(struct platform_device *device)
+{
+	int num_pp_cores = 0;
+	int err = -1;
+#if defined(CONFIG_ARCH_REALVIEW)
+	u32 m400_gp_version;
+#endif
+
+	/* Detect present Mali GPU and connect the correct resources to the device */
+#if defined(CONFIG_ARCH_VEXPRESS)
+
+#if defined(CONFIG_ARM64)
+	if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n"));
+		num_pp_cores = 6;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n"));
+		num_pp_cores = 4;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n"));
+		num_pp_cores = 3;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n"));
+		num_pp_cores = 2;
+	} else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n"));
+		num_pp_cores = 1;
+	}
+#else
+	if (mali_read_phys(0xFC000000) == 0x00000450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n"));
+		num_pp_cores = 8;
+	} else if (mali_read_phys(0xFC000000) == 0x40400450) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n"));
+		num_pp_cores = 4;
+	} else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n"));
+		num_pp_cores = 4;
+	}
+#endif
+
+#elif defined(CONFIG_ARCH_REALVIEW)
+
+	m400_gp_version = mali_read_phys(0xC000006C);
+	if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) {
+		MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n"));
+		num_pp_cores = 1;
+		mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+	} else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) {
+		u32 fpga_fw_version = mali_read_phys(0xC0010000);
+		if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) {
+			/* Mali-400 MP1 r1p0 or r1p1 */
+			MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n"));
+			num_pp_cores = 1;
+			mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+		} else if (fpga_fw_version == 0x130C000F) {
+			/* Mali-400 MP2 r1p1 */
+			MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n"));
+			num_pp_cores = 2;
+			mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */
+		}
+	}
+#endif
+
+	/* After kernel 3.15 device tree will default set dev
+	 * related parameters in of_platform_device_create_pdata.
+	 * But kernel changes from version to version,
+	 * For example 3.10 didn't include device->dev.dma_mask parameter setting,
+	 * if we didn't include here will cause dma_mapping error,
+	 * but in kernel 3.15 it include  device->dev.dma_mask parameter setting,
+	 * so it's better to set must need paramter by DDK itself.
+	 */
+	if (!device->dev.dma_mask)
+		device->dev.dma_mask = &device->dev.coherent_dma_mask;
+	device->dev.archdata.dma_ops = dma_ops;
+
+	err = platform_device_add_data(device, &mali_gpu_data, sizeof(mali_gpu_data));
+
+	if (0 == err) {
+#ifdef CONFIG_PM_RUNTIME
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37))
+		pm_runtime_set_autosuspend_delay(&(device->dev), 1000);
+		pm_runtime_use_autosuspend(&(device->dev));
+#endif
+		pm_runtime_enable(&(device->dev));
+#endif
+		MALI_DEBUG_ASSERT(0 < num_pp_cores);
+		mali_core_scaling_init(num_pp_cores);
+	}
+
+	return err;
+}
+
+int mali_platform_device_deinit(struct platform_device *device)
+{
+	MALI_IGNORE(device);
+
+	MALI_DEBUG_PRINT(4, ("mali_platform_device_deinit() called\n"));
+
+	mali_core_scaling_term();
+
+#if defined(CONFIG_ARCH_REALVIEW)
+	mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */
+#endif
+
+	return 0;
+}
+
+#endif /* CONFIG_MALI_DT */
+
+static u32 mali_read_phys(u32 phys_addr)
+{
+	u32 phys_addr_page = phys_addr & 0xFFFFE000;
+	u32 phys_offset    = phys_addr & 0x00001FFF;
+	u32 map_size       = phys_offset + sizeof(u32);
+	u32 ret = 0xDEADBEEF;
+	void *mem_mapped = ioremap_nocache(phys_addr_page, map_size);
+	if (NULL != mem_mapped) {
+		ret = (u32)ioread32(((u8 *)mem_mapped) + phys_offset);
+		iounmap(mem_mapped);
+	}
+
+	return ret;
+}
+
+#if defined(CONFIG_ARCH_REALVIEW)
+static void mali_write_phys(u32 phys_addr, u32 value)
+{
+	u32 phys_addr_page = phys_addr & 0xFFFFE000;
+	u32 phys_offset    = phys_addr & 0x00001FFF;
+	u32 map_size       = phys_offset + sizeof(u32);
+	void *mem_mapped = ioremap_nocache(phys_addr_page, map_size);
+	if (NULL != mem_mapped) {
+		iowrite32(value, ((u8 *)mem_mapped) + phys_offset);
+		iounmap(mem_mapped);
+	}
+}
+#endif
+
+static int param_set_core_scaling(const char *val, const struct kernel_param *kp)
+{
+	int ret = param_set_int(val, kp);
+
+	if (1 == mali_core_scaling_enable) {
+		mali_core_scaling_sync(mali_executor_get_num_cores_enabled());
+	}
+	return ret;
+}
+
+static struct kernel_param_ops param_ops_core_scaling = {
+	.set = param_set_core_scaling,
+	.get = param_get_int,
+};
+
+module_param_cb(mali_core_scaling_enable, &param_ops_core_scaling, &mali_core_scaling_enable, 0644);
+MODULE_PARM_DESC(mali_core_scaling_enable, "1 means to enable core scaling policy, 0 means to disable core scaling policy");
+
+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data)
+{
+	if (1 == mali_core_scaling_enable) {
+		mali_core_scaling_update(data);
+	}
+}
diff --git a/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.c b/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.c
new file mode 100644
index 000000000000..2c24742eb4de
--- /dev/null
+++ b/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.c
+ * Example core scaling policy.
+ */
+
+#include "arm_core_scaling.h"
+
+#include <linux/mali/mali_utgard.h>
+#include "mali_kernel_common.h"
+
+#include <linux/workqueue.h>
+
+static int num_cores_total;
+static int num_cores_enabled;
+
+static struct work_struct wq_work;
+
+static void set_num_cores(struct work_struct *work)
+{
+	int err = mali_perf_set_num_pp_cores(num_cores_enabled);
+	MALI_DEBUG_ASSERT(0 == err);
+	MALI_IGNORE(err);
+}
+
+static void enable_one_core(void)
+{
+	if (num_cores_enabled < num_cores_total) {
+		++num_cores_enabled;
+		schedule_work(&wq_work);
+		MALI_DEBUG_PRINT(3, ("Core scaling: Enabling one more core\n"));
+	}
+
+	MALI_DEBUG_ASSERT(1 <= num_cores_enabled);
+	MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled);
+}
+
+static void disable_one_core(void)
+{
+	if (1 < num_cores_enabled) {
+		--num_cores_enabled;
+		schedule_work(&wq_work);
+		MALI_DEBUG_PRINT(3, ("Core scaling: Disabling one core\n"));
+	}
+
+	MALI_DEBUG_ASSERT(1 <= num_cores_enabled);
+	MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled);
+}
+
+static void enable_max_num_cores(void)
+{
+	if (num_cores_enabled < num_cores_total) {
+		num_cores_enabled = num_cores_total;
+		schedule_work(&wq_work);
+		MALI_DEBUG_PRINT(3, ("Core scaling: Enabling maximum number of cores\n"));
+	}
+
+	MALI_DEBUG_ASSERT(num_cores_total == num_cores_enabled);
+}
+
+void mali_core_scaling_init(int num_pp_cores)
+{
+	INIT_WORK(&wq_work, set_num_cores);
+
+	num_cores_total   = num_pp_cores;
+	num_cores_enabled = num_pp_cores;
+
+	/* NOTE: Mali is not fully initialized at this point. */
+}
+
+void mali_core_scaling_sync(int num_cores)
+{
+	num_cores_enabled = num_cores;
+}
+
+void mali_core_scaling_term(void)
+{
+	flush_scheduled_work();
+}
+
+#define PERCENT_OF(percent, max) ((int) ((percent)*(max)/100.0 + 0.5))
+
+void mali_core_scaling_update(struct mali_gpu_utilization_data *data)
+{
+	/*
+	 * This function implements a very trivial PP core scaling algorithm.
+	 *
+	 * It is _NOT_ of production quality.
+	 * The only intention behind this algorithm is to exercise and test the
+	 * core scaling functionality of the driver.
+	 * It is _NOT_ tuned for neither power saving nor performance!
+	 *
+	 * Other metrics than PP utilization need to be considered as well
+	 * in order to make a good core scaling algorithm.
+	 */
+
+	MALI_DEBUG_PRINT(3, ("Utilization: (%3d, %3d, %3d), cores enabled: %d/%d\n", data->utilization_gpu, data->utilization_gp, data->utilization_pp, num_cores_enabled, num_cores_total));
+
+	/* NOTE: this function is normally called directly from the utilization callback which is in
+	 * timer context. */
+
+	if (PERCENT_OF(90, 256) < data->utilization_pp) {
+		enable_max_num_cores();
+	} else if (PERCENT_OF(50, 256) < data->utilization_pp) {
+		enable_one_core();
+	} else if (PERCENT_OF(40, 256) < data->utilization_pp) {
+		/* do nothing */
+	} else if (PERCENT_OF(0, 256) < data->utilization_pp) {
+		disable_one_core();
+	} else {
+		/* do nothing */
+	}
+}
diff --git a/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.h b/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.h
new file mode 100644
index 000000000000..325b5b1c6894
--- /dev/null
+++ b/drivers/gpu/arm/utgard/platform/arm/arm_core_scaling.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/**
+ * @file arm_core_scaling.h
+ * Example core scaling policy.
+ */
+
+#ifndef __ARM_CORE_SCALING_H__
+#define __ARM_CORE_SCALING_H__
+
+struct mali_gpu_utilization_data;
+
+/**
+ * Initialize core scaling policy.
+ *
+ * @note The core scaling policy will assume that all PP cores are on initially.
+ *
+ * @param num_pp_cores Total number of PP cores.
+ */
+void mali_core_scaling_init(int num_pp_cores);
+
+/**
+ * Terminate core scaling policy.
+ */
+void mali_core_scaling_term(void);
+
+/**
+ * Update core scaling policy with new utilization data.
+ *
+ * @param data Utilization data.
+ */
+void mali_core_scaling_update(struct mali_gpu_utilization_data *data);
+
+void mali_core_scaling_sync(int num_cores);
+
+#endif /* __ARM_CORE_SCALING_H__ */
diff --git a/drivers/gpu/arm/utgard/platform/hikey/mali_hikey.c b/drivers/gpu/arm/utgard/platform/hikey/mali_hikey.c
new file mode 100644
index 000000000000..cc556b2656fe
--- /dev/null
+++ b/drivers/gpu/arm/utgard/platform/hikey/mali_hikey.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (C) 2014 Hisilicon Co. Ltd.
+ * Copyright (C) 2015 ARM Limited
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+/**
+ * @file mali_hikey.c
+ * HiKey platform specific Mali driver functions.
+ */
+
+/* Set to 1 to enable ION (not tested yet). */
+#define HISI6220_USE_ION 0
+
+#define pr_fmt(fmt) "Mali: HiKey: " fmt
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#include <linux/pm.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if HISI6220_USE_ION
+#include <linux/hisi/hisi_ion.h>
+#endif
+#include <linux/byteorder/generic.h>
+
+#include <linux/mali/mali_utgard.h>
+
+#include "mali_kernel_common.h"
+#include "mali_osk.h"
+#include "mali_hikey_hi6220_registers_gpu.h"
+
+#define MALI_GPU_MHZ 1000000
+#define MALI_IRQ_ID 142
+#define MALI_FRAME_BUFFER_ADDR 0x3F100000
+#define MALI_FRAME_BUFFER_SIZE 0x00708000
+
+#define MALI_CALC_REG_MASK(bit_start, bit_end)			\
+	(((0x1 << (bit_end - bit_start + 1)) - 1) << bit_start)
+
+enum mali_core_type {
+	MALI_CORE_400_MP1 = 0,
+	MALI_CORE_400_MP2 = 1,
+	MALI_CORE_450_MP4 = 2,
+	MALI_CORE_TYPE_MAX
+};
+
+enum mali_power_mode {
+	MALI_POWER_MODE_ON,           /**< Power on */
+	MALI_POWER_MODE_LIGHT_SLEEP,  /**< Idle for a short or PM suspend */
+	MALI_POWER_MODE_DEEP_SLEEP,   /**< Idle for a long or OS suspend */
+};
+
+struct mali_soc_remap_addr_table {
+	u8 *soc_media_sctrl_base_addr;
+	u8 *soc_ao_sctrl_base_addr;
+	u8 *soc_peri_sctrl_base_addr;
+	u8 *soc_pmctl_base_addr;
+};
+
+static struct clk *mali_clk_g3d;
+static struct clk *mali_pclk_g3d;
+static struct regulator *mali_regulator;
+static struct device_node *mali_np;
+static bool mali_gpu_power_status;
+
+static struct resource mali_gpu_resources_m450_mp4[] = {
+	MALI_GPU_RESOURCES_MALI450_MP4(
+		SOC_G3D_S_BASE_ADDR, MALI_IRQ_ID, MALI_IRQ_ID, MALI_IRQ_ID,
+		MALI_IRQ_ID, MALI_IRQ_ID, MALI_IRQ_ID, MALI_IRQ_ID,
+		MALI_IRQ_ID, MALI_IRQ_ID, MALI_IRQ_ID, MALI_IRQ_ID)
+};
+
+static struct mali_soc_remap_addr_table *mali_soc_addr_table;
+
+static void mali_reg_writel(u8 *base_addr, unsigned int reg_offset,
+			    unsigned char start_bit, unsigned char end_bit,
+			    unsigned int val)
+{
+	int read_val;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(reg_lock);
+	void __iomem *addr;
+
+	WARN_ON(!base_addr);
+
+	addr = base_addr + reg_offset;
+	spin_lock_irqsave(&reg_lock, flags);
+	read_val = readl(addr) & ~(MALI_CALC_REG_MASK(start_bit, end_bit));
+	read_val |= (MALI_CALC_REG_MASK(start_bit, end_bit)
+		     & (val << start_bit));
+	writel(read_val, addr);
+	spin_unlock_irqrestore(&reg_lock, flags);
+}
+
+static unsigned int mali_reg_readl(u8 *base_addr, unsigned int reg_offset,
+				   unsigned char start_bit,
+				   unsigned char end_bit)
+{
+	unsigned int val;
+
+	WARN_ON(!base_addr);
+
+	val = readl((void __iomem *)(base_addr + reg_offset));
+	val &= MALI_CALC_REG_MASK(start_bit, end_bit);
+
+	return val >> start_bit;
+}
+
+static int mali_clock_on(void)
+{
+	u32 core_freq = 0;
+	u32 pclk_freq = 0;
+	int stat;
+
+	stat = clk_prepare_enable(mali_pclk_g3d);
+	if (stat)
+		return stat;
+
+	stat = of_property_read_u32(mali_np, "pclk_freq", &pclk_freq);
+	if (stat)
+		return stat;
+
+	stat = clk_set_rate(mali_pclk_g3d, pclk_freq * MALI_GPU_MHZ);
+	if (stat)
+		return stat;
+
+	stat = of_property_read_u32(mali_np, "mali_def_freq", &core_freq);
+	if (stat)
+		return stat;
+
+	stat = clk_set_rate(mali_clk_g3d, core_freq * MALI_GPU_MHZ);
+	if (stat)
+		return stat;
+
+	stat = clk_prepare_enable(mali_clk_g3d);
+	if (stat)
+		return stat;
+
+	mali_reg_writel(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			SOC_MEDIA_SCTRL_SC_MEDIA_CLKDIS_ADDR(0), 17, 17, 1);
+
+	return 0;
+}
+
+static void mali_clock_off(void)
+{
+	clk_disable_unprepare(mali_clk_g3d);
+	clk_disable_unprepare(mali_pclk_g3d);
+}
+
+static int mali_domain_powerup_finish(void)
+{
+	unsigned int ret;
+
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_RSTDIS0_ADDR(0), 1, 1, 1);
+	ret = mali_reg_readl(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			     SOC_AO_SCTRL_SC_PW_RST_STAT0_ADDR(0), 1, 1);
+	if (ret != 0) {
+		pr_err("SET SC_PW_RSTDIS0 failed!\n");
+		return -EFAULT;
+	}
+
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_ISODIS0_ADDR(0), 1, 1, 1);
+	ret = mali_reg_readl(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			     SOC_AO_SCTRL_SC_PW_ISO_STAT0_ADDR(0), 1, 1);
+	if (ret != 0) {
+		pr_err("SET SC_PW_ISODIS0 failed!\n");
+		return -EFAULT;
+	}
+
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_CLKEN0_ADDR(0), 1, 1, 1);
+	ret = mali_reg_readl(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			     SOC_AO_SCTRL_SC_PW_CLK_STAT0_ADDR(0), 1, 1);
+	if (ret != 1) {
+		pr_err("SET SC_PW_CLKEN0 failed!\n");
+		return -EFAULT;
+	}
+
+	mali_reg_writel(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			SOC_MEDIA_SCTRL_SC_MEDIA_RSTDIS_ADDR(0), 0, 0, 1);
+	ret = mali_reg_readl(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			     SOC_MEDIA_SCTRL_SC_MEDIA_RST_STAT_ADDR(0), 0, 0);
+	if (ret != 0) {
+		pr_err("SET SC_MEDIA_RSTDIS failed!\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int mali_regulator_enable(void)
+{
+	int i, stat;
+
+	stat = regulator_enable(mali_regulator);
+	if (stat)
+		return stat;
+
+	for (i = 0; i < 50; i++) {
+		stat = regulator_is_enabled(mali_regulator);
+		if (stat > 0)
+			break;
+		udelay(1);
+	}
+
+	if (50 == i) {
+		pr_err("regulator enable timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int mali_platform_powerup(void)
+{
+	int stat;
+
+	if (mali_gpu_power_status)
+		return 0;
+
+	stat = mali_regulator_enable();
+	if (stat)
+		return stat;
+
+	stat = mali_clock_on();
+	if (stat)
+		return stat;
+
+	stat = mali_domain_powerup_finish();
+	if (stat)
+		return stat;
+
+	mali_gpu_power_status = true;
+
+	return 0;
+}
+
+static int mali_regulator_disable(void)
+{
+	mali_reg_writel(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			SOC_MEDIA_SCTRL_SC_MEDIA_RSTEN_ADDR(0), 0, 0, 1);
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_CLKDIS0_ADDR(0), 1, 1, 1);
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_ISOEN0_ADDR(0), 1, 1, 1);
+	mali_reg_writel(mali_soc_addr_table->soc_ao_sctrl_base_addr,
+			SOC_AO_SCTRL_SC_PW_RSTEN0_ADDR(0), 1, 1, 1);
+
+	return regulator_disable(mali_regulator);
+}
+
+static int mali_platform_powerdown(void)
+{
+	int stat;
+
+	if (!mali_gpu_power_status)
+		return 0;
+
+	stat = mali_regulator_disable();
+	if (stat)
+		return stat;
+
+	mali_clock_off();
+	mali_gpu_power_status = false;
+
+	return 0;
+}
+
+static int mali_platform_power_mode_change(enum mali_power_mode power_mode)
+{
+	int stat;
+
+	switch (power_mode) {
+	case MALI_POWER_MODE_ON:
+		stat = mali_platform_powerup();
+		break;
+	case MALI_POWER_MODE_LIGHT_SLEEP:
+	case MALI_POWER_MODE_DEEP_SLEEP:
+		stat = mali_platform_powerdown();
+		break;
+	default:
+		pr_err("Invalid power mode\n");
+		stat = -EINVAL;
+		break;
+	}
+
+	return stat;
+}
+
+static int mali_os_suspend(struct device *device)
+{
+	int stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->suspend) {
+		stat = device->driver->pm->suspend(device);
+	} else {
+		stat = 0;
+	}
+
+	if (stat)
+		return stat;
+
+	return mali_platform_power_mode_change(MALI_POWER_MODE_DEEP_SLEEP);
+}
+
+static int mali_os_resume(struct device *device)
+{
+	int stat;
+
+	stat = mali_platform_power_mode_change(MALI_POWER_MODE_ON);
+	if (stat)
+		return stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->resume) {
+		stat = device->driver->pm->resume(device);
+	}
+
+	return stat;
+}
+
+static int mali_os_freeze(struct device *device)
+{
+	int stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->freeze) {
+		stat = device->driver->pm->freeze(device);
+	} else {
+		stat = 0;
+	}
+
+	return stat;
+}
+
+static int mali_os_thaw(struct device *device)
+{
+	int stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->thaw) {
+		stat = device->driver->pm->thaw(device);
+	} else {
+		stat = 0;
+	}
+
+	return stat;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int mali_runtime_suspend(struct device *device)
+{
+	int stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->runtime_suspend) {
+		stat = device->driver->pm->runtime_suspend(device);
+	} else {
+		stat = 0;
+	}
+
+	if (stat)
+		return stat;
+
+	return mali_platform_power_mode_change(MALI_POWER_MODE_LIGHT_SLEEP);
+}
+
+static int mali_runtime_resume(struct device *device)
+{
+	int stat;
+
+	stat = mali_platform_power_mode_change(MALI_POWER_MODE_ON);
+	if (stat)
+		return stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->runtime_resume) {
+		stat = device->driver->pm->runtime_resume(device);
+	}
+
+	return stat;
+}
+
+static int mali_runtime_idle(struct device *device)
+{
+	int stat;
+
+	if (device->driver &&
+	    device->driver->pm &&
+	    device->driver->pm->runtime_idle) {
+		stat = device->driver->pm->runtime_idle(device);
+	} else {
+		stat = 0;
+	}
+
+	if (stat)
+		return stat;
+
+	return pm_runtime_suspend(device);
+}
+#endif
+
+static int init_mali_clock_regulator(struct platform_device *pdev)
+{
+	int stat, ret;
+
+	BUG_ON(mali_regulator || mali_clk_g3d || mali_pclk_g3d);
+
+	/* regulator init */
+
+	mali_regulator  = regulator_get(&pdev->dev, "G3D_PD_VDD");
+	if (IS_ERR(mali_regulator)) {
+		pr_err("failed to get G3D_PD_VDD\n");
+		return -ENODEV;
+	}
+
+	stat = mali_regulator_enable();
+	if (stat)
+		return stat;
+
+	mali_gpu_power_status = true;
+
+	/* clk init */
+
+	mali_clk_g3d = clk_get(&pdev->dev, "clk_g3d");
+	if (IS_ERR(mali_clk_g3d)) {
+		pr_err("failed to get source CLK_G3D\n");
+		return -ENODEV;
+	}
+
+	mali_pclk_g3d = clk_get(&pdev->dev, "pclk_g3d");
+	if (IS_ERR(mali_pclk_g3d)) {
+		pr_err("failed to get source PCLK_G3D\n");
+		return -ENODEV;
+	}
+
+	ret = mali_reg_readl(mali_soc_addr_table->soc_peri_sctrl_base_addr,
+			     SOC_PERI_SCTRL_SC_PERIPH_CLKSTAT12_ADDR(0),
+			     10, 10);
+	if (ret != 1) {
+		mali_reg_writel(mali_soc_addr_table->soc_peri_sctrl_base_addr,
+				SOC_PERI_SCTRL_SC_PERIPH_CLKEN12_ADDR(0),
+				10, 10, 1);
+		ret = mali_reg_readl(
+			mali_soc_addr_table->soc_peri_sctrl_base_addr,
+			SOC_PERI_SCTRL_SC_PERIPH_CLKSTAT12_ADDR(0), 10, 10);
+		if (ret != 1) {
+			pr_err("SET SC_PERIPH_CLKEN12 failed!\n");
+			return -EFAULT;
+		}
+	}
+
+	stat = mali_clock_on();
+	if (stat)
+		return stat;
+
+	mali_reg_writel(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			SOC_MEDIA_SCTRL_SC_MEDIA_CLKCFG2_ADDR(0), 15, 15, 1);
+	ret = mali_reg_readl(mali_soc_addr_table->soc_media_sctrl_base_addr,
+			     SOC_MEDIA_SCTRL_SC_MEDIA_CLKCFG2_ADDR(0), 15, 15);
+	if (ret != 1) {
+		pr_err("SET SC_MEDIA_CLKCFG2 failed!\n");
+		return -EFAULT;
+	}
+
+	return mali_domain_powerup_finish();
+}
+
+static int deinit_mali_clock_regulator(void)
+{
+	int stat;
+
+	BUG_ON(!mali_regulator || !mali_clk_g3d || !mali_pclk_g3d);
+
+	stat = mali_platform_powerdown();
+	if (stat)
+		return stat;
+
+	clk_put(mali_clk_g3d);
+	mali_clk_g3d = NULL;
+	clk_put(mali_pclk_g3d);
+	mali_pclk_g3d = NULL;
+	regulator_put(mali_regulator);
+	mali_regulator = NULL;
+
+	return 0;
+}
+
+static struct mali_gpu_device_data mali_gpu_data = {
+	.shared_mem_size = 1024 * 1024 * 1024, /* 1024MB */
+	.fb_start = MALI_FRAME_BUFFER_ADDR,
+	.fb_size = MALI_FRAME_BUFFER_SIZE,
+	.max_job_runtime = 2000, /* 2 seconds time out */
+	.control_interval = 50, /* 50ms */
+#ifdef CONFIG_MALI_DVFS
+	.utilization_callback = mali_gpu_utilization_proc,
+#endif
+};
+
+static const struct dev_pm_ops mali_gpu_device_type_pm_ops = {
+	.suspend = mali_os_suspend,
+	.resume = mali_os_resume,
+	.freeze = mali_os_freeze,
+	.thaw = mali_os_thaw,
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = mali_runtime_suspend,
+	.runtime_resume = mali_runtime_resume,
+	.runtime_idle = mali_runtime_idle,
+#endif
+};
+
+static struct device_type mali_gpu_device_device_type = {
+	.pm = &mali_gpu_device_type_pm_ops,
+};
+
+static enum mali_core_type mali_get_gpu_type(void)
+{
+	u32 gpu_type = MALI_CORE_TYPE_MAX;
+	int err = of_property_read_u32(mali_np, "mali_type", &gpu_type);
+
+	if (err) {
+		pr_err("failed to read mali_type from device tree\n");
+		return -EFAULT;
+	}
+
+	return gpu_type;
+}
+
+#if HISI6220_USE_ION
+static int mali_ion_mem_init(void)
+{
+	struct ion_heap_info_data mem_data;
+
+	if (hisi_ion_get_heap_info(ION_FB_HEAP_ID, &mem_data)) {
+		pr_err("Failed to get ION_FB_HEAP_ID\n");
+		return -EFAULT;
+	}
+
+	if (mem_data.heap_size == 0) {
+		pr_err("fb size is 0\n");
+		return -EINVAL;
+	}
+
+	mali_gpu_data.fb_size = mem_data.heap_size;
+	mali_gpu_data.fb_start = (unsigned long)(mem_data.heap_phy);
+	pr_debug("fb_size=0x%x, fb_start=0x%x\n",
+		 mali_gpu_data.fb_size, mali_gpu_data.fb_start);
+
+	return 0;
+}
+#endif
+
+static int mali_remap_soc_addr(void)
+{
+	BUG_ON(mali_soc_addr_table);
+
+	mali_soc_addr_table = kmalloc(sizeof(struct mali_soc_remap_addr_table),
+				      GFP_KERNEL);
+	if (!mali_soc_addr_table)
+		return -ENOMEM;
+
+	mali_soc_addr_table->soc_media_sctrl_base_addr =
+		ioremap(SOC_MEDIA_SCTRL_BASE_ADDR, REG_MEDIA_SC_IOSIZE);
+	mali_soc_addr_table->soc_ao_sctrl_base_addr =
+		ioremap(SOC_AO_SCTRL_BASE_ADDR, REG_SC_ON_IOSIZE);
+	mali_soc_addr_table->soc_peri_sctrl_base_addr =
+		ioremap(SOC_PERI_SCTRL_BASE_ADDR, REG_SC_OFF_IOSIZE);
+	mali_soc_addr_table->soc_pmctl_base_addr =
+		ioremap(SOC_PMCTRL_BASE_ADDR, REG_PMCTRL_IOSIZE);
+
+	if (!mali_soc_addr_table->soc_media_sctrl_base_addr
+	    || !mali_soc_addr_table->soc_ao_sctrl_base_addr
+	    || !mali_soc_addr_table->soc_peri_sctrl_base_addr
+	    || !mali_soc_addr_table->soc_pmctl_base_addr) {
+		pr_err("Failed to remap SoC addresses\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mali_unmap_soc_addr(void)
+{
+	iounmap((void __iomem *)mali_soc_addr_table->soc_media_sctrl_base_addr);
+	iounmap((void __iomem *)mali_soc_addr_table->soc_ao_sctrl_base_addr);
+	iounmap((void __iomem *)mali_soc_addr_table->soc_peri_sctrl_base_addr);
+	iounmap((void __iomem *)mali_soc_addr_table->soc_pmctl_base_addr);
+	kfree(mali_soc_addr_table);
+	mali_soc_addr_table = NULL;
+}
+
+int mali_platform_device_init(struct platform_device *pdev)
+{
+	int stat;
+	int irq, i;
+
+#if HISI6220_USE_ION
+	stat = mali_ion_mem_init();
+	if (stat)
+		return stat;
+#endif
+
+	stat = mali_remap_soc_addr();
+	if (stat)
+		return stat;
+
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	pdev->dev.type = &mali_gpu_device_device_type;
+	pdev->dev.platform_data = &mali_gpu_data;
+	pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+	mali_np = pdev->dev.of_node;
+
+	if (mali_get_gpu_type() != MALI_CORE_450_MP4) {
+		pr_err("Unexpected GPU type\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * We need to use DT to get the irq domain, so rewrite the static
+	 * table with the irq given from platform_get_irq().
+	 */
+	irq = platform_get_irq(pdev, 0);
+	for (i = 0; i < ARRAY_SIZE(mali_gpu_resources_m450_mp4); i++) {
+		if (IORESOURCE_IRQ & mali_gpu_resources_m450_mp4[i].flags) {
+			mali_gpu_resources_m450_mp4[i].start = irq;
+			mali_gpu_resources_m450_mp4[i].end = irq;
+		}
+	}
+	pdev->num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp4);
+	pdev->resource = mali_gpu_resources_m450_mp4;
+
+	stat = init_mali_clock_regulator(pdev);
+	if (stat)
+		return stat;
+
+#ifdef CONFIG_PM_RUNTIME
+	pm_runtime_set_autosuspend_delay(&(pdev->dev), 1);
+	pm_runtime_use_autosuspend(&(pdev->dev));
+	pm_runtime_enable(&pdev->dev);
+#endif
+
+	return 0;
+}
+
+int mali_platform_device_deinit(void)
+{
+	int stat;
+
+	stat = deinit_mali_clock_regulator();
+	if (stat)
+		return stat;
+
+	mali_unmap_soc_addr();
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/utgard/platform/hikey/mali_hikey_hi6220_registers_gpu.h b/drivers/gpu/arm/utgard/platform/hikey/mali_hikey_hi6220_registers_gpu.h
new file mode 100644
index 000000000000..0bdf4a0482fd
--- /dev/null
+++ b/drivers/gpu/arm/utgard/platform/hikey/mali_hikey_hi6220_registers_gpu.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 Hisilicon Co. Ltd.
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * Author: Xuzixin <Xuzixin@hisilicon.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef MALI_HIKEY_HI6220_REGISTERS_GPU_H
+#define MALI_HIKEY_HI6220_REGISTERS_GPU_H 1
+
+#include <linux/mm.h>
+
+#define SOC_G3D_S_BASE_ADDR		0xF4080000 /* G3D ctrl base addr */
+#define SOC_MEDIA_SCTRL_BASE_ADDR	0xF4410000 /* media ctrl base addr */
+#define REG_MEDIA_SC_IOSIZE		PAGE_ALIGN(SZ_4K)
+#define SOC_PMCTRL_BASE_ADDR		0xF7032000 /* pm ctrl base addr */
+#define REG_PMCTRL_IOSIZE		PAGE_ALIGN(SZ_4K)
+#define SOC_AO_SCTRL_BASE_ADDR		0xF7800000 /* ao ctrl base addr */
+#define SOC_PERI_SCTRL_BASE_ADDR	0xF7030000 /* peri ctrl base addr */
+#define REG_SC_ON_IOSIZE		PAGE_ALIGN(SZ_8K)
+#define REG_SC_OFF_IOSIZE		PAGE_ALIGN(SZ_4K)
+
+/* ----------------------------------------------------------------------------
+ * MEDIA SCTRL
+ */
+
+#define SOC_MEDIA_SCTRL_SC_MEDIA_SUBSYS_CTRL5_ADDR(base) ((base) + (0x51C))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_CLKCFG0_ADDR(base)   ((base) + (0xCBC))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_CLKCFG2_ADDR(base)   ((base) + (0xCC4))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_CLKEN_ADDR(base)     ((base) + (0x520))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_CLKDIS_ADDR(base)    ((base) + (0x524))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_RSTEN_ADDR(base)     ((base) + (0x52C))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_RSTDIS_ADDR(base)    ((base) + (0x530))
+#define SOC_MEDIA_SCTRL_SC_MEDIA_RST_STAT_ADDR(base)  ((base) + (0x534))
+
+/* ----------------------------------------------------------------------------
+ * AO SCTRL,only bit 1 is necessary for GPU.
+ */
+
+#define SOC_AO_SCTRL_SC_PW_CLKEN0_ADDR(base)          ((base) + (0x800))
+#define SOC_AO_SCTRL_SC_PW_CLKDIS0_ADDR(base)         ((base) + (0x804))
+#define SOC_AO_SCTRL_SC_PW_CLK_STAT0_ADDR(base)       ((base) + (0x808))
+#define SOC_AO_SCTRL_SC_PW_RSTEN0_ADDR(base)          ((base) + (0x810))
+#define SOC_AO_SCTRL_SC_PW_RSTDIS0_ADDR(base)         ((base) + (0x814))
+#define SOC_AO_SCTRL_SC_PW_RST_STAT0_ADDR(base)       ((base) + (0x818))
+#define SOC_AO_SCTRL_SC_PW_ISOEN0_ADDR(base)          ((base) + (0x820))
+#define SOC_AO_SCTRL_SC_PW_ISODIS0_ADDR(base)         ((base) + (0x824))
+#define SOC_AO_SCTRL_SC_PW_ISO_STAT0_ADDR(base)       ((base) + (0x828))
+#define SOC_AO_SCTRL_SC_PW_MTCMOS_EN0_ADDR(base)      ((base) + (0x830))
+#define SOC_AO_SCTRL_SC_PW_MTCMOS_DIS0_ADDR(base)     ((base) + (0x834))
+#define SOC_AO_SCTRL_SC_PW_MTCMOS_STAT0_ADDR(base)    ((base) + (0x838))
+
+/* ----------------------------------------------------------------------------
+ * PERI SCTRL,only bit 10 is necessary for GPU.
+ */
+
+#define SOC_PERI_SCTRL_SC_PERIPH_CLKEN12_ADDR(base)   ((base) + (0x270))
+#define SOC_PERI_SCTRL_SC_PERIPH_CLKSTAT12_ADDR(base) ((base) + (0x278))
+
+#endif /* MALI_HIKEY_HI6220_REGISTERS_GPU_H */
diff --git a/drivers/gpu/arm/utgard/readme.txt b/drivers/gpu/arm/utgard/readme.txt
new file mode 100644
index 000000000000..6785ac933b38
--- /dev/null
+++ b/drivers/gpu/arm/utgard/readme.txt
@@ -0,0 +1,28 @@
+Building the Mali Device Driver for Linux
+-----------------------------------------
+
+Build the Mali Device Driver for Linux by running the following make command:
+
+KDIR=<kdir_path> USING_UMP=<ump_option> BUILD=<build_option> make
+
+where
+    kdir_path: Path to your Linux Kernel directory
+    ump_option: 1 = Enable UMP support(*)
+                0 = disable UMP support
+    build_option: debug = debug build of driver
+                  release = release build of driver
+
+(*)  For newer Linux Kernels, the Module.symvers file for the UMP device driver
+     must be available. The UMP_SYMVERS_FILE variable in the Makefile should
+     point to this file. This file is generated when the UMP driver is built.
+
+The result will be a mali.ko file, which can be loaded into the Linux kernel
+by using the insmod command.
+
+Use of UMP is not recommended. The dma-buf API in the Linux kernel has
+replaced UMP. The Mali Device Driver will be built with dma-buf support if the
+kernel config includes enabled dma-buf.
+
+The kernel needs to be provided with a platform_device struct for the Mali GPU
+device. See the mali_utgard.h header file for how to set up the Mali GPU
+resources.
diff --git a/drivers/gpu/arm/utgard/regs/mali_200_regs.h b/drivers/gpu/arm/utgard/regs/mali_200_regs.h
new file mode 100644
index 000000000000..e76f9926f5c0
--- /dev/null
+++ b/drivers/gpu/arm/utgard/regs/mali_200_regs.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALI200_REGS_H_
+#define _MALI200_REGS_H_
+
+/**
+ *  Enum for management register addresses.
+ */
+enum mali200_mgmt_reg {
+	MALI200_REG_ADDR_MGMT_VERSION                              = 0x1000,
+	MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR               = 0x1004,
+	MALI200_REG_ADDR_MGMT_STATUS                               = 0x1008,
+	MALI200_REG_ADDR_MGMT_CTRL_MGMT                            = 0x100c,
+
+	MALI200_REG_ADDR_MGMT_INT_RAWSTAT                          = 0x1020,
+	MALI200_REG_ADDR_MGMT_INT_CLEAR                            = 0x1024,
+	MALI200_REG_ADDR_MGMT_INT_MASK                             = 0x1028,
+	MALI200_REG_ADDR_MGMT_INT_STATUS                           = 0x102c,
+
+	MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS                     = 0x1050,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE                    = 0x1080,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC                       = 0x1084,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT                     = 0x1088,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE                     = 0x108c,
+
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE                    = 0x10a0,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC                       = 0x10a4,
+	MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE                     = 0x10ac,
+
+	MALI200_REG_ADDR_MGMT_PERFMON_CONTR                        = 0x10b0,
+	MALI200_REG_ADDR_MGMT_PERFMON_BASE                         = 0x10b4,
+
+	MALI200_REG_SIZEOF_REGISTER_BANK                           = 0x10f0
+
+};
+
+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1
+
+enum mali200_mgmt_ctrl_mgmt {
+	MALI200_REG_VAL_CTRL_MGMT_STOP_BUS         = (1 << 0),
+	MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES     = (1 << 3),
+	MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET      = (1 << 5),
+	MALI200_REG_VAL_CTRL_MGMT_START_RENDERING  = (1 << 6),
+	MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET     = (1 << 7), /* Only valid for Mali-300 and later */
+};
+
+enum mali200_mgmt_irq {
+	MALI200_REG_VAL_IRQ_END_OF_FRAME          = (1 << 0),
+	MALI200_REG_VAL_IRQ_END_OF_TILE           = (1 << 1),
+	MALI200_REG_VAL_IRQ_HANG                  = (1 << 2),
+	MALI200_REG_VAL_IRQ_FORCE_HANG            = (1 << 3),
+	MALI200_REG_VAL_IRQ_BUS_ERROR             = (1 << 4),
+	MALI200_REG_VAL_IRQ_BUS_STOP              = (1 << 5),
+	MALI200_REG_VAL_IRQ_CNT_0_LIMIT           = (1 << 6),
+	MALI200_REG_VAL_IRQ_CNT_1_LIMIT           = (1 << 7),
+	MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR  = (1 << 8),
+	MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW  = (1 << 10),
+	MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW   = (1 << 11),
+	MALI400PP_REG_VAL_IRQ_RESET_COMPLETED       = (1 << 12),
+};
+
+#define MALI200_REG_VAL_IRQ_MASK_ALL  ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_END_OF_TILE                            |\
+				       MALI200_REG_VAL_IRQ_HANG                                   |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_BUS_STOP                               |\
+				       MALI200_REG_VAL_IRQ_CNT_0_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_CNT_1_LIMIT                            |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW                    |\
+				       MALI400PP_REG_VAL_IRQ_RESET_COMPLETED))
+
+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\
+				       MALI200_REG_VAL_IRQ_END_OF_FRAME                           |\
+				       MALI200_REG_VAL_IRQ_FORCE_HANG                             |\
+				       MALI200_REG_VAL_IRQ_BUS_ERROR                              |\
+				       MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR                   |\
+				       MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND                  |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW                   |\
+				       MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW))
+
+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0))
+
+enum mali200_mgmt_status {
+	MALI200_REG_VAL_STATUS_RENDERING_ACTIVE     = (1 << 0),
+	MALI200_REG_VAL_STATUS_BUS_STOPPED          = (1 << 4),
+};
+
+enum mali200_render_unit {
+	MALI200_REG_ADDR_FRAME = 0x0000,
+	MALI200_REG_ADDR_RSW   = 0x0004,
+	MALI200_REG_ADDR_STACK = 0x0030,
+	MALI200_REG_ADDR_STACK_SIZE = 0x0034,
+	MALI200_REG_ADDR_ORIGIN_OFFSET_X  = 0x0040
+};
+
+enum mali200_wb_unit {
+	MALI200_REG_ADDR_WB0 = 0x0100,
+	MALI200_REG_ADDR_WB1 = 0x0200,
+	MALI200_REG_ADDR_WB2 = 0x0300
+};
+
+enum mali200_wb_unit_regs {
+	MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000,
+	MALI200_REG_ADDR_WB_SOURCE_ADDR   = 0x0004,
+};
+
+/* This should be in the top 16 bit of the version register of Mali PP */
+#define MALI200_PP_PRODUCT_ID 0xC807
+#define MALI300_PP_PRODUCT_ID 0xCE07
+#define MALI400_PP_PRODUCT_ID 0xCD07
+#define MALI450_PP_PRODUCT_ID 0xCF07
+#define MALI470_PP_PRODUCT_ID 0xCF08
+
+
+
+#endif /* _MALI200_REGS_H_ */
diff --git a/drivers/gpu/arm/utgard/regs/mali_gp_regs.h b/drivers/gpu/arm/utgard/regs/mali_gp_regs.h
new file mode 100644
index 000000000000..9c101f9ddd22
--- /dev/null
+++ b/drivers/gpu/arm/utgard/regs/mali_gp_regs.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2010, 2012-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _MALIGP2_CONROL_REGS_H_
+#define _MALIGP2_CONROL_REGS_H_
+
+/**
+ * These are the different geometry processor control registers.
+ * Their usage is to control and monitor the operation of the
+ * Vertex Shader and the Polygon List Builder in the geometry processor.
+ * Addresses are in 32-bit word relative sizes.
+ * @see [P0081] "Geometry Processor Data Structures" for details
+ */
+
+typedef enum {
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR           = 0x00,
+	MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR             = 0x04,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR         = 0x08,
+	MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR           = 0x0c,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR     = 0x10,
+	MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR       = 0x14,
+	MALIGP2_REG_ADDR_MGMT_CMD                       = 0x20,
+	MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT               = 0x24,
+	MALIGP2_REG_ADDR_MGMT_INT_CLEAR                 = 0x28,
+	MALIGP2_REG_ADDR_MGMT_INT_MASK                  = 0x2C,
+	MALIGP2_REG_ADDR_MGMT_INT_STAT                  = 0x30,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE         = 0x3C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE         = 0x40,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC            = 0x44,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC            = 0x48,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE          = 0x4C,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE          = 0x50,
+	MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT          = 0x54,
+	MALIGP2_REG_ADDR_MGMT_STATUS                    = 0x68,
+	MALIGP2_REG_ADDR_MGMT_VERSION                   = 0x6C,
+	MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ      = 0x80,
+	MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ     = 0x84,
+	MALIGP2_CONTR_AXI_BUS_ERROR_STAT                = 0x94,
+	MALIGP2_REGISTER_ADDRESS_SPACE_SIZE             = 0x98,
+} maligp_reg_addr_mgmt_addr;
+
+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1
+
+/**
+ * Commands to geometry processor.
+ *  @see MALIGP2_CTRL_REG_CMD
+ */
+typedef enum {
+	MALIGP2_REG_VAL_CMD_START_VS                    = (1 << 0),
+	MALIGP2_REG_VAL_CMD_START_PLBU                  = (1 << 1),
+	MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC   = (1 << 4),
+	MALIGP2_REG_VAL_CMD_RESET                               = (1 << 5),
+	MALIGP2_REG_VAL_CMD_FORCE_HANG                  = (1 << 6),
+	MALIGP2_REG_VAL_CMD_STOP_BUS                    = (1 << 9),
+	MALI400GP_REG_VAL_CMD_SOFT_RESET                = (1 << 10), /* only valid for Mali-300 and later */
+} mgp_contr_reg_val_cmd;
+
+
+/**  @defgroup MALIGP2_IRQ
+ * Interrupt status of geometry processor.
+ *  @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR,
+ *       MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT
+ * @{
+ */
+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      (1 << 0)
+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    (1 << 1)
+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     (1 << 2)
+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          (1 << 3)
+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        (1 << 4)
+#define MALIGP2_REG_VAL_IRQ_HANG                (1 << 5)
+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG          (1 << 6)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    (1 << 7)
+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    (1 << 8)
+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     (1 << 9)
+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR          (1 << 10)
+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       (1 << 11)
+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     (1 << 12)
+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      (1 << 13)
+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     (1 << 14)
+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     (1 << 19)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20)
+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  (1 << 21)
+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  (1 << 22)
+
+/* Mask defining all IRQs in Mali GP */
+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ          | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ        | \
+	 MALIGP2_REG_VAL_IRQ_HANG                | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT    | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED     | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_RESET_COMPLETED     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining the IRQs in Mali GP which we use */
+#define MALIGP2_REG_VAL_IRQ_MASK_USED \
+	(\
+	 MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST      | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST    | \
+	 MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM     | \
+	 MALIGP2_REG_VAL_IRQ_FORCE_HANG          | \
+	 MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR     | \
+	 MALIGP2_REG_VAL_IRQ_SYNC_ERROR          | \
+	 MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR       | \
+	 MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD      | \
+	 MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD     | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \
+	 MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW  | \
+	 MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+/* Mask defining non IRQs on MaliGP2*/
+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0
+
+/** }@ defgroup MALIGP2_IRQ*/
+
+/** @defgroup MALIGP2_STATUS
+ * The different Status values to the geometry processor.
+ *  @see MALIGP2_CTRL_REG_STATUS
+ * @{
+ */
+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE         0x0002
+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED       0x0004
+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE       0x0008
+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR         0x0040
+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR   0x0100
+/** }@ defgroup MALIGP2_STATUS*/
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\
+		MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\
+		MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE)
+
+
+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\
+		MALIGP2_REG_VAL_STATUS_BUS_ERROR |\
+		MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR )
+
+/* This should be in the top 16 bit of the version register of gp.*/
+#define MALI200_GP_PRODUCT_ID 0xA07
+#define MALI300_GP_PRODUCT_ID 0xC07
+#define MALI400_GP_PRODUCT_ID 0xB07
+#define MALI450_GP_PRODUCT_ID 0xD07
+
+/**
+ * The different sources for instrumented on the geometry processor.
+ *  @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC
+ */
+
+enum MALIGP2_cont_reg_perf_cnt_src {
+	MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a,
+};
+
+#endif
diff --git a/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.c b/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.c
new file mode 100644
index 000000000000..a486e2f7684f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.h b/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.h
new file mode 100644
index 000000000000..65f3ab274c09
--- /dev/null
+++ b/drivers/gpu/arm/utgard/timestamp-arm11-cc/mali_timestamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	/*
+	 * reset counters and overflow flags
+	 */
+
+	u32 mask = (1 << 0) | /* enable all three counters */
+		   (0 << 1) | /* reset both Count Registers to 0x0 */
+		   (1 << 2) | /* reset the Cycle Counter Register to 0x0 */
+		   (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */
+		   (0 << 4) | /* Count Register 0 interrupt enable */
+		   (0 << 5) | /* Count Register 1 interrupt enable */
+		   (0 << 6) | /* Cycle Counter interrupt enable */
+		   (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */
+		   (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */
+		   (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */
+
+	__asm__ __volatile__("MCR    p15, 0, %0, c15, c12, 0" : : "r"(mask));
+
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	u32 result;
+
+	/* this is for the clock cycles */
+	__asm__ __volatile__("MRC    p15, 0, %0, c15, c12, 1" : "=r"(result));
+
+	return (u64)result;
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.c b/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.c
new file mode 100644
index 000000000000..a486e2f7684f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2010-2011, 2013, 2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "mali_timestamp.h"
+
+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */
diff --git a/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.h b/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.h
new file mode 100644
index 000000000000..8ba47060828f
--- /dev/null
+++ b/drivers/gpu/arm/utgard/timestamp-default/mali_timestamp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2011, 2013-2015 ARM Limited. All rights reserved.
+ * 
+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
+ * 
+ * A copy of the licence is included with the program, and can also be obtained from Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __MALI_TIMESTAMP_H__
+#define __MALI_TIMESTAMP_H__
+
+#include "mali_osk.h"
+
+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void)
+{
+	return _MALI_OSK_ERR_OK;
+}
+
+MALI_STATIC_INLINE u64 _mali_timestamp_get(void)
+{
+	return _mali_osk_boot_time_get_ns();
+}
+
+#endif /* __MALI_TIMESTAMP_H__ */
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index c4bf9a1cf4a6..038aae8c7c66 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -97,6 +97,14 @@ config DRM_KMS_CMA_HELPER
 	help
 	  Choose this if you need the KMS CMA helper functions
 
+config DRM_CMA_FBDEV_BUFFER_NUM
+	int "Cma Fbdev Buffer Number"
+	depends on DRM_KMS_CMA_HELPER
+	default 1
+	help
+	  Defines the buffer number of cma fbdev.  Default is one buffer.
+	  For double buffer please set to 2 and 3 for triple buffer.
+
 source "drivers/gpu/drm/i2c/Kconfig"
 
 config DRM_TDFX
@@ -266,3 +274,5 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
 source "drivers/gpu/drm/imx/Kconfig"
 
 source "drivers/gpu/drm/vc4/Kconfig"
+
+source "drivers/gpu/drm/hisilicon/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1e9ff4c3e3db..95ba48234f3e 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -75,3 +75,4 @@ obj-y			+= i2c/
 obj-y			+= panel/
 obj-y			+= bridge/
 obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
+obj-y			+= hisilicon/
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c19a62561183..26051b849e93 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -25,6 +25,12 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_DRM_CMA_FBDEV_BUFFER_NUM
+#define FBDEV_BUFFER_NUM CONFIG_DRM_CMA_FBDEV_BUFFER_NUM
+#else
+#define FBDEV_BUFFER_NUM 1
+#endif
+
 struct drm_fb_cma {
 	struct drm_framebuffer		fb;
 	struct drm_gem_cma_object	*obj[4];
@@ -253,7 +259,7 @@ static int drm_fbdev_cma_create(struct drm_fb_helper *helper,
 	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
 
 	mode_cmd.width = sizes->surface_width;
-	mode_cmd.height = sizes->surface_height;
+	mode_cmd.height = sizes->surface_height * FBDEV_BUFFER_NUM;
 	mode_cmd.pitches[0] = sizes->surface_width * bytes_per_pixel;
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 		sizes->surface_depth);
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 2d5ca8eec13a..e487f56ffc4c 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -45,9 +45,26 @@
  * subset of the MIPI DCS command set.
  */
 
+static const struct device_type mipi_dsi_device_type;
+
 static int mipi_dsi_device_match(struct device *dev, struct device_driver *drv)
 {
-	return of_driver_match_device(dev, drv);
+	struct mipi_dsi_device *dsi;
+
+	dsi = dev->type == &mipi_dsi_device_type ?
+		to_mipi_dsi_device(dev) : NULL;
+
+	if (!dsi)
+		return 0;
+
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	if (!strcmp(drv->name, "mipi_dsi_dummy") &&
+			!strcmp(dsi->name, "dummy"))
+		return 1;
+
+	return 0;
 }
 
 static const struct dev_pm_ops mipi_dsi_device_pm_ops = {
@@ -102,9 +119,41 @@ static const struct device_type mipi_dsi_device_type = {
 	.release = mipi_dsi_dev_release,
 };
 
-static struct mipi_dsi_device *mipi_dsi_device_alloc(struct mipi_dsi_host *host)
+struct mipi_dsi_device_info {
+	char name[DSI_DEV_NAME_SIZE];
+	u32 reg;
+	struct device_node *node;
+};
+
+static int __dsi_check_chan_busy(struct device *dev, void *data)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+	u32 reg = *(u32 *) data;
+
+	if (dsi && dsi->channel == reg)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int mipi_dsi_check_chan_busy(struct mipi_dsi_host *host, u32 reg)
+{
+	return device_for_each_child(host->dev, &reg, __dsi_check_chan_busy);
+}
+
+static struct mipi_dsi_device *
+mipi_dsi_device_new(struct mipi_dsi_host *host,
+		    struct mipi_dsi_device_info *info)
 {
+	struct device *dev = host->dev;
 	struct mipi_dsi_device *dsi;
+	int r;
+
+	if (info->reg > 3) {
+		dev_err(dev, "dsi device %s has invalid channel value: %u\n",
+			info->name, info->reg);
+		return ERR_PTR(-EINVAL);
+	}
 
 	dsi = kzalloc(sizeof(*dsi), GFP_KERNEL);
 	if (!dsi)
@@ -114,62 +163,90 @@ static struct mipi_dsi_device *mipi_dsi_device_alloc(struct mipi_dsi_host *host)
 	dsi->dev.bus = &mipi_dsi_bus_type;
 	dsi->dev.parent = host->dev;
 	dsi->dev.type = &mipi_dsi_device_type;
+	dsi->dev.of_node = info->node;
+	dsi->channel = info->reg;
+	strlcpy(dsi->name, info->name, sizeof(dsi->name));
 
-	device_initialize(&dsi->dev);
+	dev_set_name(&dsi->dev, "%s.%d", dev_name(host->dev), info->reg);
 
-	return dsi;
-}
-
-static int mipi_dsi_device_add(struct mipi_dsi_device *dsi)
-{
-	struct mipi_dsi_host *host = dsi->host;
+	r = mipi_dsi_check_chan_busy(host, info->reg);
+	if (r)
+		goto err;
 
-	dev_set_name(&dsi->dev, "%s.%d", dev_name(host->dev),  dsi->channel);
+	r = device_register(&dsi->dev);
+	if (r)
+		goto err;
 
-	return device_add(&dsi->dev);
+	return dsi;
+err:
+	kfree(dsi);
+	return ERR_PTR(r);
 }
 
 static struct mipi_dsi_device *
 of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
 {
-	struct mipi_dsi_device *dsi;
 	struct device *dev = host->dev;
+	struct mipi_dsi_device_info info = { };
 	int ret;
-	u32 reg;
 
-	ret = of_property_read_u32(node, "reg", &reg);
+	if (of_modalias_node(node, info.name, sizeof(info.name)) < 0) {
+		dev_err(dev, "modalias failure on %s\n", node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ret = of_property_read_u32(node, "reg", &info.reg);
 	if (ret) {
 		dev_err(dev, "device node %s has no valid reg property: %d\n",
 			node->full_name, ret);
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (reg > 3) {
-		dev_err(dev, "device node %s has invalid reg property: %u\n",
-			node->full_name, reg);
-		return ERR_PTR(-EINVAL);
-	}
+	info.node = of_node_get(node);
 
-	dsi = mipi_dsi_device_alloc(host);
-	if (IS_ERR(dsi)) {
-		dev_err(dev, "failed to allocate DSI device %s: %ld\n",
-			node->full_name, PTR_ERR(dsi));
-		return dsi;
-	}
+	return mipi_dsi_device_new(host, &info);
+}
+
+static struct mipi_dsi_driver dummy_dsi_driver = {
+	.driver.name = "mipi_dsi_dummy",
+};
 
-	dsi->dev.of_node = of_node_get(node);
-	dsi->channel = reg;
+struct mipi_dsi_device *mipi_dsi_new_dummy(struct mipi_dsi_host *host, u32 reg)
+{
+	struct mipi_dsi_device_info info = { "dummy", reg, NULL, };
 
-	ret = mipi_dsi_device_add(dsi);
-	if (ret) {
-		dev_err(dev, "failed to add DSI device %s: %d\n",
-			node->full_name, ret);
-		kfree(dsi);
-		return ERR_PTR(ret);
+	return mipi_dsi_device_new(host, &info);
+}
+EXPORT_SYMBOL(mipi_dsi_new_dummy);
+
+void mipi_dsi_unregister_device(struct mipi_dsi_device *dsi)
+{
+	if (dsi)
+		device_unregister(&dsi->dev);
+}
+EXPORT_SYMBOL(mipi_dsi_unregister_device);
+
+static DEFINE_MUTEX(host_lock);
+static LIST_HEAD(host_list);
+
+struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node)
+{
+	struct mipi_dsi_host *host;
+
+	mutex_lock(&host_lock);
+
+	list_for_each_entry(host, &host_list, list) {
+		if (host->dev->of_node == node) {
+			mutex_unlock(&host_lock);
+			return host;
+		}
 	}
 
-	return dsi;
+	mutex_unlock(&host_lock);
+
+	return NULL;
 }
+EXPORT_SYMBOL(of_find_mipi_dsi_host_by_node);
 
 int mipi_dsi_host_register(struct mipi_dsi_host *host)
 {
@@ -182,6 +259,10 @@ int mipi_dsi_host_register(struct mipi_dsi_host *host)
 		of_mipi_dsi_device_add(host, node);
 	}
 
+	mutex_lock(&host_lock);
+	list_add_tail(&host->list, &host_list);
+	mutex_unlock(&host_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL(mipi_dsi_host_register);
@@ -198,6 +279,10 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 void mipi_dsi_host_unregister(struct mipi_dsi_host *host)
 {
 	device_for_each_child(host->dev, NULL, mipi_dsi_remove_device_fn);
+
+	mutex_lock(&host_lock);
+	list_del_init(&host->list);
+	mutex_unlock(&host_lock);
 }
 EXPORT_SYMBOL(mipi_dsi_host_unregister);
 
@@ -924,7 +1009,13 @@ EXPORT_SYMBOL(mipi_dsi_driver_unregister);
 
 static int __init mipi_dsi_bus_init(void)
 {
-	return bus_register(&mipi_dsi_bus_type);
+	int ret;
+
+	ret = bus_register(&mipi_dsi_bus_type);
+	if (ret < 0)
+		return ret;
+
+	return mipi_dsi_driver_register(&dummy_dsi_driver);
 }
 postcore_initcall(mipi_dsi_bus_init);
 
diff --git a/drivers/gpu/drm/hisilicon/Kconfig b/drivers/gpu/drm/hisilicon/Kconfig
new file mode 100644
index 000000000000..558c61b1b8e8
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/Kconfig
@@ -0,0 +1,5 @@
+#
+# hisilicon drm device configuration.
+# Please keep this list sorted alphabetically
+
+source "drivers/gpu/drm/hisilicon/kirin/Kconfig"
diff --git a/drivers/gpu/drm/hisilicon/Makefile b/drivers/gpu/drm/hisilicon/Makefile
new file mode 100644
index 000000000000..e3f6d493c996
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for hisilicon drm drivers.
+# Please keep this list sorted alphabetically
+
+obj-$(CONFIG_DRM_HISI_KIRIN) += kirin/
diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig
new file mode 100644
index 000000000000..57f6017f70c2
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig
@@ -0,0 +1,20 @@
+config DRM_HISI_KIRIN
+	tristate "DRM Support for Hisilicon Kirin series SoCs Platform"
+	depends on DRM && OF && ARM64
+	select DRM_KMS_HELPER
+	select DRM_GEM_CMA_HELPER
+	select DRM_KMS_CMA_HELPER
+	select HISI_KIRIN_DW_DSI
+	help
+	  Choose this option if you have a hisilicon Kirin chipsets(hi6220).
+	  If M is selected the module will be called kirin-drm.
+
+config HISI_KIRIN_DW_DSI
+	tristate "HiSilicon Kirin specific extensions for Synopsys DW MIPI DSI"
+	depends on DRM_HISI_KIRIN
+	select DRM_MIPI_DSI
+	select DRM_PANEL
+	help
+	 This selects support for HiSilicon Kirin SoC specific extensions for
+	 the Synopsys DesignWare DSI driver. If you want to enable MIPI DSI on
+	 hi6220 based SoC, you should selet this option.
diff --git a/drivers/gpu/drm/hisilicon/kirin/Makefile b/drivers/gpu/drm/hisilicon/kirin/Makefile
new file mode 100644
index 000000000000..cdf61589485c
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/Makefile
@@ -0,0 +1,6 @@
+kirin-drm-y := kirin_drm_drv.o \
+	       kirin_drm_ade.o
+
+obj-$(CONFIG_DRM_HISI_KIRIN) += kirin-drm.o
+
+obj-$(CONFIG_HISI_KIRIN_DW_DSI) += dw_drm_dsi.o
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
new file mode 100644
index 000000000000..aebb8fc27c56
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -0,0 +1,1267 @@
+/*
+ * DesignWare MIPI DSI Host Controller v1.02 driver
+ *
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * Author:
+ *	Xinliang Liu <z.liuxinliang@hisilicon.com>
+ *	Xinliang Liu <xinliang.liu@linaro.org>
+ *	Xinwei Kong <kong.kongxinwei@hisilicon.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/iopoll.h>
+#include <video/mipi_display.h>
+#include <linux/gpio/consumer.h>
+
+#include <drm/drm_of.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_encoder_slave.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_panel.h>
+
+#include "dw_dsi_reg.h"
+
+#define MAX_TX_ESC_CLK		10
+#define ROUND(x, y)		((x) / (y) + \
+				((x) % (y) * 10 / (y) >= 5 ? 1 : 0))
+#define PHY_REF_CLK_RATE	19200000
+#define PHY_REF_CLK_PERIOD_PS	(1000000000 / (PHY_REF_CLK_RATE / 1000))
+
+#define encoder_to_dsi(encoder) \
+	container_of(encoder, struct dw_dsi, encoder)
+#define host_to_dsi(host) \
+	container_of(host, struct dw_dsi, host)
+#define connector_to_dsi(connector) \
+	container_of(connector, struct dw_dsi, connector)
+
+enum dsi_output_client {
+	OUT_HDMI = 0,
+	OUT_PANEL,
+	OUT_MAX
+};
+
+struct mipi_phy_params {
+	u32 clk_t_lpx;
+	u32 clk_t_hs_prepare;
+	u32 clk_t_hs_zero;
+	u32 clk_t_hs_trial;
+	u32 clk_t_wakeup;
+	u32 data_t_lpx;
+	u32 data_t_hs_prepare;
+	u32 data_t_hs_zero;
+	u32 data_t_hs_trial;
+	u32 data_t_ta_go;
+	u32 data_t_ta_get;
+	u32 data_t_wakeup;
+	u32 hstx_ckg_sel;
+	u32 pll_fbd_div5f;
+	u32 pll_fbd_div1f;
+	u32 pll_fbd_2p;
+	u32 pll_enbwt;
+	u32 pll_fbd_p;
+	u32 pll_fbd_s;
+	u32 pll_pre_div1p;
+	u32 pll_pre_p;
+	u32 pll_vco_750M;
+	u32 pll_lpf_rs;
+	u32 pll_lpf_cs;
+	u32 clklp2hs_time;
+	u32 clkhs2lp_time;
+	u32 lp2hs_time;
+	u32 hs2lp_time;
+	u32 clk_to_data_delay;
+	u32 data_to_clk_delay;
+	u32 lane_byte_clk_kHz;
+	u32 clk_division;
+};
+
+struct dsi_hw_ctx {
+	void __iomem *base;
+	struct clk *pclk;
+};
+
+struct dw_dsi_client {
+	u32 lanes;
+	u32 phy_clock; /* in kHz */
+	enum mipi_dsi_pixel_format format;
+	unsigned long mode_flags;
+};
+
+struct dw_dsi {
+	struct drm_encoder encoder;
+	struct drm_bridge *bridge;
+	struct drm_panel *panel;
+	struct mipi_dsi_host host;
+	struct drm_connector connector; /* connector for panel */
+	struct drm_display_mode cur_mode;
+	struct dsi_hw_ctx *ctx;
+	struct mipi_phy_params phy;
+
+	u32 lanes;
+	enum mipi_dsi_pixel_format format;
+	unsigned long mode_flags;
+	struct gpio_desc *gpio_mux;
+	struct dw_dsi_client client[OUT_MAX];
+	enum dsi_output_client cur_client;
+	bool enable;
+};
+
+struct dsi_data {
+	struct dw_dsi dsi;
+	struct dsi_hw_ctx ctx;
+};
+
+struct dsi_phy_range {
+	u32 min_range_kHz;
+	u32 max_range_kHz;
+	u32 pll_vco_750M;
+	u32 hstx_ckg_sel;
+};
+
+static const struct dsi_phy_range dphy_range_info[] = {
+	{   46875,    62500,   1,    7 },
+	{   62500,    93750,   0,    7 },
+	{   93750,   125000,   1,    6 },
+	{  125000,   187500,   0,    6 },
+	{  187500,   250000,   1,    5 },
+	{  250000,   375000,   0,    5 },
+	{  375000,   500000,   1,    4 },
+	{  500000,   750000,   0,    4 },
+	{  750000,  1000000,   1,    0 },
+	{ 1000000,  1500000,   0,    0 }
+};
+
+void dsi_set_output_client(struct drm_device *dev)
+{
+	enum dsi_output_client client;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct dw_dsi *dsi;
+
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	/* find dsi encoder */
+	drm_for_each_encoder(encoder, dev)
+		if (encoder->encoder_type == DRM_MODE_ENCODER_DSI)
+			break;
+	dsi = encoder_to_dsi(encoder);
+
+	/* find HDMI connector */
+	drm_for_each_connector(connector, dev)
+		if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA)
+			break;
+
+	/*
+	 * set the proper dsi output client
+	 */
+	client = connector->status == connector_status_connected ?
+		OUT_HDMI : OUT_PANEL;
+	if (client != dsi->cur_client) {
+		/* associate bridge and dsi encoder */
+		if (client == OUT_HDMI)
+			encoder->bridge = dsi->bridge;
+		else
+			encoder->bridge = NULL;
+
+		gpiod_set_value_cansleep(dsi->gpio_mux, client);
+		dsi->cur_client = client;
+		/* let the userspace know panel connector status has changed */
+		drm_sysfs_hotplug_event(dev);
+		DRM_INFO("client change to %s\n", client == OUT_HDMI ?
+				 "HDMI" : "panel");
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(dsi_set_output_client);
+
+static u32 dsi_calc_phy_rate(u32 req_kHz, struct mipi_phy_params *phy)
+{
+	u32 ref_clk_ps = PHY_REF_CLK_PERIOD_PS;
+	u32 tmp_kHz = req_kHz;
+	u32 i = 0;
+	u32 q_pll = 1;
+	u32 m_pll = 0;
+	u32 n_pll = 0;
+	u32 r_pll = 1;
+	u32 m_n = 0;
+	u32 m_n_int = 0;
+	u32 f_kHz = 0;
+	u64 temp;
+
+	/*
+	 * Find a rate >= req_kHz.
+	 */
+	do {
+		f_kHz = tmp_kHz;
+
+		for (i = 0; i < ARRAY_SIZE(dphy_range_info); i++)
+			if (f_kHz >= dphy_range_info[i].min_range_kHz &&
+			    f_kHz <= dphy_range_info[i].max_range_kHz)
+				break;
+
+		if (i == ARRAY_SIZE(dphy_range_info)) {
+			DRM_ERROR("%dkHz out of range\n", f_kHz);
+			return 0;
+		}
+
+		phy->pll_vco_750M = dphy_range_info[i].pll_vco_750M;
+		phy->hstx_ckg_sel = dphy_range_info[i].hstx_ckg_sel;
+
+		if (phy->hstx_ckg_sel <= 7 &&
+		    phy->hstx_ckg_sel >= 4)
+			q_pll = 0x10 >> (7 - phy->hstx_ckg_sel);
+
+		temp = f_kHz * (u64)q_pll * (u64)ref_clk_ps;
+		m_n_int = temp / (u64)1000000000;
+		m_n = (temp % (u64)1000000000) / (u64)100000000;
+
+		if (m_n_int % 2 == 0) {
+			if (m_n * 6 >= 50) {
+				n_pll = 2;
+				m_pll = (m_n_int + 1) * n_pll;
+			} else if (m_n * 6 >= 30) {
+				n_pll = 3;
+				m_pll = m_n_int * n_pll + 2;
+			} else {
+				n_pll = 1;
+				m_pll = m_n_int * n_pll;
+			}
+		} else {
+			if (m_n * 6 >= 50) {
+				n_pll = 1;
+				m_pll = (m_n_int + 1) * n_pll;
+			} else if (m_n * 6 >= 30) {
+				n_pll = 1;
+				m_pll = (m_n_int + 1) * n_pll;
+			} else if (m_n * 6 >= 10) {
+				n_pll = 3;
+				m_pll = m_n_int * n_pll + 1;
+			} else {
+				n_pll = 2;
+				m_pll = m_n_int * n_pll;
+			}
+		}
+
+		if (n_pll == 1) {
+			phy->pll_fbd_p = 0;
+			phy->pll_pre_div1p = 1;
+		} else {
+			phy->pll_fbd_p = n_pll;
+			phy->pll_pre_div1p = 0;
+		}
+
+		if (phy->pll_fbd_2p <= 7 && phy->pll_fbd_2p >= 4)
+			r_pll = 0x10 >> (7 - phy->pll_fbd_2p);
+
+		if (m_pll == 2) {
+			phy->pll_pre_p = 0;
+			phy->pll_fbd_s = 0;
+			phy->pll_fbd_div1f = 0;
+			phy->pll_fbd_div5f = 1;
+		} else if (m_pll >= 2 * 2 * r_pll && m_pll <= 2 * 4 * r_pll) {
+			phy->pll_pre_p = m_pll / (2 * r_pll);
+			phy->pll_fbd_s = 0;
+			phy->pll_fbd_div1f = 1;
+			phy->pll_fbd_div5f = 0;
+		} else if (m_pll >= 2 * 5 * r_pll && m_pll <= 2 * 150 * r_pll) {
+			if (((m_pll / (2 * r_pll)) % 2) == 0) {
+				phy->pll_pre_p =
+					(m_pll / (2 * r_pll)) / 2 - 1;
+				phy->pll_fbd_s =
+					(m_pll / (2 * r_pll)) % 2 + 2;
+			} else {
+				phy->pll_pre_p =
+					(m_pll / (2 * r_pll)) / 2;
+				phy->pll_fbd_s =
+					(m_pll / (2 * r_pll)) % 2;
+			}
+			phy->pll_fbd_div1f = 0;
+			phy->pll_fbd_div5f = 0;
+		} else {
+			phy->pll_pre_p = 0;
+			phy->pll_fbd_s = 0;
+			phy->pll_fbd_div1f = 0;
+			phy->pll_fbd_div5f = 1;
+		}
+
+		f_kHz = (u64)1000000000 * (u64)m_pll /
+			((u64)ref_clk_ps * (u64)n_pll * (u64)q_pll);
+
+		if (f_kHz >= req_kHz)
+			break;
+
+		tmp_kHz += 10;
+
+	} while (true);
+
+	return f_kHz;
+}
+
+static void dsi_get_phy_params(u32 phy_req_kHz,
+			       struct mipi_phy_params *phy)
+{
+	u32 ref_clk_ps = PHY_REF_CLK_PERIOD_PS;
+	u32 phy_rate_kHz;
+	u32 ui;
+
+	memset(phy, 0, sizeof(*phy));
+
+	phy_rate_kHz = dsi_calc_phy_rate(phy_req_kHz, phy);
+	if (!phy_rate_kHz)
+		return;
+
+	ui = 1000000 / phy_rate_kHz;
+
+	phy->clk_t_lpx = ROUND(50, 8 * ui);
+	phy->clk_t_hs_prepare = ROUND(133, 16 * ui) - 1;
+
+	phy->clk_t_hs_zero = ROUND(262, 8 * ui);
+	phy->clk_t_hs_trial = 2 * (ROUND(60, 8 * ui) - 1);
+	phy->clk_t_wakeup = ROUND(1000000, (ref_clk_ps / 1000) - 1);
+	if (phy->clk_t_wakeup > 0xff)
+		phy->clk_t_wakeup = 0xff;
+	phy->data_t_wakeup = phy->clk_t_wakeup;
+	phy->data_t_lpx = phy->clk_t_lpx;
+	phy->data_t_hs_prepare = ROUND(125 + 10 * ui, 16 * ui) - 1;
+	phy->data_t_hs_zero = ROUND(105 + 6 * ui, 8 * ui);
+	phy->data_t_hs_trial = 2 * (ROUND(60 + 4 * ui, 8 * ui) - 1);
+	phy->data_t_ta_go = 3;
+	phy->data_t_ta_get = 4;
+
+	phy->pll_enbwt = 1;
+	phy->clklp2hs_time = ROUND(407, 8 * ui) + 12;
+	phy->clkhs2lp_time = ROUND(105 + 12 * ui, 8 * ui);
+	phy->lp2hs_time = ROUND(240 + 12 * ui, 8 * ui) + 1;
+	phy->hs2lp_time = phy->clkhs2lp_time;
+	phy->clk_to_data_delay = 1 + phy->clklp2hs_time;
+	phy->data_to_clk_delay = ROUND(60 + 52 * ui, 8 * ui) +
+				phy->clkhs2lp_time;
+
+	phy->lane_byte_clk_kHz = phy_rate_kHz / 8;
+	phy->clk_division =
+		DIV_ROUND_UP(phy->lane_byte_clk_kHz, MAX_TX_ESC_CLK);
+}
+
+static u32 dsi_get_dpi_color_coding(enum mipi_dsi_pixel_format format)
+{
+	u32 val;
+
+	/*
+	 * TODO: only support RGB888 now, to support more
+	 */
+	switch (format) {
+	case MIPI_DSI_FMT_RGB888:
+		val = DSI_24BITS_1;
+		break;
+	default:
+		val = DSI_24BITS_1;
+		break;
+	}
+
+	return val;
+}
+
+/*
+ * dsi phy reg write function
+ */
+static void dsi_phy_tst_set(void __iomem *base, u32 reg, u32 val)
+{
+	u32 reg_write = 0x10000 + reg;
+
+	/*
+	 * latch reg first
+	 */
+	writel(reg_write, base + PHY_TST_CTRL1);
+	writel(0x02, base + PHY_TST_CTRL0);
+	writel(0x00, base + PHY_TST_CTRL0);
+
+	/*
+	 * then latch value
+	 */
+	writel(val, base + PHY_TST_CTRL1);
+	writel(0x02, base + PHY_TST_CTRL0);
+	writel(0x00, base + PHY_TST_CTRL0);
+}
+
+static void dsi_set_phy_timer(void __iomem *base,
+			      struct mipi_phy_params *phy,
+			      u32 lanes)
+{
+	u32 val;
+
+	/*
+	 * Set lane value and phy stop wait time.
+	 */
+	val = (lanes - 1) | (PHY_STOP_WAIT_TIME << 8);
+	writel(val, base + PHY_IF_CFG);
+
+	/*
+	 * Set phy clk division.
+	 */
+	val = readl(base + CLKMGR_CFG) | phy->clk_division;
+	writel(val, base + CLKMGR_CFG);
+
+	/*
+	 * Set lp and hs switching params.
+	 */
+	dw_update_bits(base + PHY_TMR_CFG, 24, MASK(8), phy->hs2lp_time);
+	dw_update_bits(base + PHY_TMR_CFG, 16, MASK(8), phy->lp2hs_time);
+	dw_update_bits(base + PHY_TMR_LPCLK_CFG, 16, MASK(10),
+		       phy->clkhs2lp_time);
+	dw_update_bits(base + PHY_TMR_LPCLK_CFG, 0, MASK(10),
+		       phy->clklp2hs_time);
+	dw_update_bits(base + CLK_DATA_TMR_CFG, 8, MASK(8),
+		       phy->data_to_clk_delay);
+	dw_update_bits(base + CLK_DATA_TMR_CFG, 0, MASK(8),
+		       phy->clk_to_data_delay);
+}
+
+static void dsi_set_mipi_phy(void __iomem *base,
+			     struct mipi_phy_params *phy,
+			     u32 lanes)
+{
+	u32 delay_count;
+	u32 val;
+	u32 i;
+
+	/* phy timer setting */
+	dsi_set_phy_timer(base, phy, lanes);
+
+	/*
+	 * Reset to clean up phy tst params.
+	 */
+	writel(0, base + PHY_RSTZ);
+	writel(0, base + PHY_TST_CTRL0);
+	writel(1, base + PHY_TST_CTRL0);
+	writel(0, base + PHY_TST_CTRL0);
+
+	/*
+	 * Clock lane timing control setting: TLPX, THS-PREPARE,
+	 * THS-ZERO, THS-TRAIL, TWAKEUP.
+	 */
+	dsi_phy_tst_set(base, CLK_TLPX, phy->clk_t_lpx);
+	dsi_phy_tst_set(base, CLK_THS_PREPARE, phy->clk_t_hs_prepare);
+	dsi_phy_tst_set(base, CLK_THS_ZERO, phy->clk_t_hs_zero);
+	dsi_phy_tst_set(base, CLK_THS_TRAIL, phy->clk_t_hs_trial);
+	dsi_phy_tst_set(base, CLK_TWAKEUP, phy->clk_t_wakeup);
+
+	/*
+	 * Data lane timing control setting: TLPX, THS-PREPARE,
+	 * THS-ZERO, THS-TRAIL, TTA-GO, TTA-GET, TWAKEUP.
+	 */
+	for (i = 0; i < lanes; i++) {
+		dsi_phy_tst_set(base, DATA_TLPX(i), phy->data_t_lpx);
+		dsi_phy_tst_set(base, DATA_THS_PREPARE(i),
+				phy->data_t_hs_prepare);
+		dsi_phy_tst_set(base, DATA_THS_ZERO(i), phy->data_t_hs_zero);
+		dsi_phy_tst_set(base, DATA_THS_TRAIL(i), phy->data_t_hs_trial);
+		dsi_phy_tst_set(base, DATA_TTA_GO(i), phy->data_t_ta_go);
+		dsi_phy_tst_set(base, DATA_TTA_GET(i), phy->data_t_ta_get);
+		dsi_phy_tst_set(base, DATA_TWAKEUP(i), phy->data_t_wakeup);
+	}
+
+	/*
+	 * physical configuration: I, pll I, pll II, pll III,
+	 * pll IV, pll V.
+	 */
+	dsi_phy_tst_set(base, PHY_CFG_I, phy->hstx_ckg_sel);
+	val = (phy->pll_fbd_div5f << 5) + (phy->pll_fbd_div1f << 4) +
+				(phy->pll_fbd_2p << 1) + phy->pll_enbwt;
+	dsi_phy_tst_set(base, PHY_CFG_PLL_I, val);
+	dsi_phy_tst_set(base, PHY_CFG_PLL_II, phy->pll_fbd_p);
+	dsi_phy_tst_set(base, PHY_CFG_PLL_III, phy->pll_fbd_s);
+	val = (phy->pll_pre_div1p << 7) + phy->pll_pre_p;
+	dsi_phy_tst_set(base, PHY_CFG_PLL_IV, val);
+	val = (5 << 5) + (phy->pll_vco_750M << 4) + (phy->pll_lpf_rs << 2) +
+		phy->pll_lpf_cs;
+	dsi_phy_tst_set(base, PHY_CFG_PLL_V, val);
+
+	writel(PHY_ENABLECLK, base + PHY_RSTZ);
+	udelay(1);
+	writel(PHY_ENABLECLK | PHY_UNSHUTDOWNZ, base + PHY_RSTZ);
+	udelay(1);
+	writel(PHY_ENABLECLK | PHY_UNRSTZ | PHY_UNSHUTDOWNZ, base + PHY_RSTZ);
+	usleep_range(1000, 1500);
+
+	/*
+	 * wait for phy's clock ready
+	 */
+	delay_count = 100;
+	while (delay_count--) {
+		val = readl(base +  PHY_STATUS);
+		if ((BIT(0) | BIT(2)) & val)
+			break;
+
+		udelay(1);
+	}
+
+	if (!delay_count)
+		DRM_INFO("phylock and phystopstateclklane is not ready.\n");
+}
+
+static void dsi_set_mode_timing(void __iomem *base,
+				u32 lane_byte_clk_kHz,
+				struct drm_display_mode *mode,
+				enum mipi_dsi_pixel_format format)
+{
+	u32 hfp, hbp, hsw, vfp, vbp, vsw;
+	u32 hline_time;
+	u32 hsa_time;
+	u32 hbp_time;
+	u32 pixel_clk_kHz;
+	int htot, vtot;
+	u32 val;
+	u64 tmp;
+
+	val = dsi_get_dpi_color_coding(format);
+	writel(val, base + DPI_COLOR_CODING);
+
+	val = (mode->flags & DRM_MODE_FLAG_NHSYNC ? 1 : 0) << 2;
+	val |= (mode->flags & DRM_MODE_FLAG_NVSYNC ? 1 : 0) << 1;
+	writel(val, base +  DPI_CFG_POL);
+
+	/*
+	 * The DSI IP accepts vertical timing using lines as normal,
+	 * but horizontal timing is a mixture of pixel-clocks for the
+	 * active region and byte-lane clocks for the blanking-related
+	 * timings.  hfp is specified as the total hline_time in byte-
+	 * lane clocks minus hsa, hbp and active.
+	 */
+	pixel_clk_kHz = mode->clock;
+	htot = mode->htotal;
+	vtot = mode->vtotal;
+	hfp = mode->hsync_start - mode->hdisplay;
+	hbp = mode->htotal - mode->hsync_end;
+	hsw = mode->hsync_end - mode->hsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+	vsw = mode->vsync_end - mode->vsync_start;
+	if (vsw > 15) {
+		DRM_DEBUG_DRIVER("vsw exceeded 15\n");
+		vsw = 15;
+	}
+
+	hsa_time = (hsw * lane_byte_clk_kHz) / pixel_clk_kHz;
+	hbp_time = (hbp * lane_byte_clk_kHz) / pixel_clk_kHz;
+	tmp = (u64)htot * (u64)lane_byte_clk_kHz;
+	hline_time = DIV_ROUND_UP(tmp, pixel_clk_kHz);
+
+	/* all specified in byte-lane clocks */
+	writel(hsa_time, base + VID_HSA_TIME);
+	writel(hbp_time, base + VID_HBP_TIME);
+	writel(hline_time, base + VID_HLINE_TIME);
+
+	writel(vsw, base + VID_VSA_LINES);
+	writel(vbp, base + VID_VBP_LINES);
+	writel(vfp, base + VID_VFP_LINES);
+	writel(mode->vdisplay, base + VID_VACTIVE_LINES);
+	writel(mode->hdisplay, base + VID_PKT_SIZE);
+
+	DRM_DEBUG_DRIVER("htot=%d, hfp=%d, hbp=%d, hsw=%d\n",
+			 htot, hfp, hbp, hsw);
+	DRM_DEBUG_DRIVER("vtol=%d, vfp=%d, vbp=%d, vsw=%d\n",
+			 vtot, vfp, vbp, vsw);
+	DRM_DEBUG_DRIVER("hsa_time=%d, hbp_time=%d, hline_time=%d\n",
+			 hsa_time, hbp_time, hline_time);
+}
+
+static void dsi_set_video_mode(void __iomem *base, unsigned long flags)
+{
+	u32 val;
+	u32 mode_mask = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		MIPI_DSI_MODE_VIDEO_SYNC_PULSE;
+	u32 non_burst_sync_pulse = MIPI_DSI_MODE_VIDEO |
+		MIPI_DSI_MODE_VIDEO_SYNC_PULSE;
+	u32 non_burst_sync_event = MIPI_DSI_MODE_VIDEO;
+
+	/*
+	 * choose video mode type
+	 */
+	if ((flags & mode_mask) == non_burst_sync_pulse)
+		val = DSI_NON_BURST_SYNC_PULSES;
+	else if ((flags & mode_mask) == non_burst_sync_event)
+		val = DSI_NON_BURST_SYNC_EVENTS;
+	else
+		val = DSI_BURST_SYNC_PULSES_1 | (0x3f << 8);
+
+	writel(val, base + VID_MODE_CFG);
+	writel(PHY_TXREQUESTCLKHS, base + LPCLK_CTRL);
+}
+
+static void dsi_set_command_mode(void __iomem *base)
+{
+	writel(CMD_MODE_ALL_LP, base + CMD_MODE_CFG);
+	writel(DSI_COMMAND_MODE, base + MODE_CFG);
+}
+
+static void dw_dsi_set_mode(struct dw_dsi *dsi, enum dsi_work_mode mode)
+{
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	void __iomem *base = ctx->base;
+
+	writel(RESET, base + PWR_UP);
+	writel(mode, base + MODE_CFG);
+	writel(POWERUP, base + PWR_UP);
+}
+static void dsi_mipi_init(struct dw_dsi *dsi)
+{
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	struct mipi_phy_params *phy = &dsi->phy;
+	struct drm_display_mode *mode = &dsi->cur_mode;
+	void __iomem *base = ctx->base;
+	u32 id = dsi->cur_client;
+	u32 dphy_req_kHz;
+	int bpp;
+
+	/*
+	 * count phy params
+	 */
+	bpp = mipi_dsi_pixel_format_to_bpp(dsi->client[id].format);
+	if (bpp < 0)
+		return;
+	if (dsi->client[id].phy_clock)
+		dphy_req_kHz = dsi->client[id].phy_clock;
+	else
+		dphy_req_kHz = mode->clock * bpp / dsi->client[id].lanes;
+	dsi_get_phy_params(dphy_req_kHz, phy);
+
+	/* reset Core */
+	writel(RESET, base + PWR_UP);
+
+	/* set dsi phy params */
+	dsi_set_mipi_phy(base, phy, dsi->client[id].lanes);
+
+	/* set dsi mode timing */
+	dsi_set_mode_timing(base, phy->lane_byte_clk_kHz, mode,
+			    dsi->client[id].format);
+
+	/* set dsi video mode */
+	dsi_set_video_mode(base, dsi->client[id].mode_flags);
+
+	/* set command mode */
+	dsi_set_command_mode(base);
+
+	/* dsi wake up */
+	writel(POWERUP, base + PWR_UP);
+
+	DRM_DEBUG_DRIVER("lanes=%d, pixel_clk=%d kHz, bytes_freq=%d kHz\n",
+			 dsi->client[id].lanes, mode->clock,
+			 phy->lane_byte_clk_kHz);
+}
+
+static void dsi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct dw_dsi *dsi = encoder_to_dsi(encoder);
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	void __iomem *base = ctx->base;
+
+	if (!dsi->enable)
+		return;
+
+	dw_dsi_set_mode(dsi, DSI_COMMAND_MODE);
+	/* turn off panel's backlight */
+	if (dsi->panel && drm_panel_disable(dsi->panel))
+		DRM_ERROR("failed to disaable panel\n");
+
+	/* turn off panel */
+	if (dsi->panel && drm_panel_unprepare(dsi->panel))
+		DRM_ERROR("failed to unprepare panel\n");
+
+	writel(0, base + PWR_UP);
+	writel(0, base + LPCLK_CTRL);
+	writel(0, base + PHY_RSTZ);
+	clk_disable_unprepare(ctx->pclk);
+
+	dsi->enable = false;
+}
+
+static void dsi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct dw_dsi *dsi = encoder_to_dsi(encoder);
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	int ret;
+
+	if (dsi->enable)
+		return;
+
+	ret = clk_prepare_enable(ctx->pclk);
+	if (ret) {
+		DRM_ERROR("fail to enable pclk: %d\n", ret);
+		return;
+	}
+
+	dsi_mipi_init(dsi);
+
+	/* turn on panel */
+	if (dsi->panel && drm_panel_prepare(dsi->panel))
+		DRM_ERROR("failed to prepare panel\n");
+
+	dw_dsi_set_mode(dsi, DSI_VIDEO_MODE);
+
+	/* turn on panel's back light */
+	if (dsi->panel && drm_panel_enable(dsi->panel))
+		DRM_ERROR("failed to enable panel\n");
+
+	dsi->enable = true;
+}
+
+static void dsi_encoder_mode_set(struct drm_encoder *encoder,
+				 struct drm_display_mode *mode,
+				 struct drm_display_mode *adj_mode)
+{
+	struct dw_dsi *dsi = encoder_to_dsi(encoder);
+
+	drm_mode_copy(&dsi->cur_mode, adj_mode);
+}
+
+static int dsi_encoder_atomic_check(struct drm_encoder *encoder,
+				    struct drm_crtc_state *crtc_state,
+				    struct drm_connector_state *conn_state)
+{
+	/* do nothing */
+	return 0;
+}
+
+static const struct drm_encoder_helper_funcs dw_encoder_helper_funcs = {
+	.atomic_check	= dsi_encoder_atomic_check,
+	.mode_set	= dsi_encoder_mode_set,
+	.enable		= dsi_encoder_enable,
+	.disable	= dsi_encoder_disable
+};
+
+static const struct drm_encoder_funcs dw_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static int dw_drm_encoder_init(struct device *dev,
+			       struct drm_device *drm_dev,
+			       struct drm_encoder *encoder)
+{
+	int ret;
+	u32 crtc_mask = drm_of_find_possible_crtcs(drm_dev, dev->of_node);
+
+	if (!crtc_mask) {
+		DRM_ERROR("failed to find crtc mask\n");
+		return -EINVAL;
+	}
+
+	encoder->possible_crtcs = crtc_mask;
+	ret = drm_encoder_init(drm_dev, encoder, &dw_encoder_funcs,
+			       DRM_MODE_ENCODER_DSI);
+	if (ret) {
+		DRM_ERROR("failed to init dsi encoder\n");
+		return ret;
+	}
+
+	drm_encoder_helper_add(encoder, &dw_encoder_helper_funcs);
+
+	return 0;
+}
+
+static int dsi_host_attach(struct mipi_dsi_host *host,
+			   struct mipi_dsi_device *mdsi)
+{
+	struct dw_dsi *dsi = host_to_dsi(host);
+	u32 id = mdsi->channel >= 1 ? OUT_PANEL : OUT_HDMI;
+
+	if (mdsi->lanes < 1 || mdsi->lanes > 4) {
+		DRM_ERROR("dsi device params invalid\n");
+		return -EINVAL;
+	}
+
+	dsi->client[id].lanes = mdsi->lanes;
+	dsi->client[id].format = mdsi->format;
+	dsi->client[id].mode_flags = mdsi->mode_flags;
+	dsi->client[id].phy_clock = mdsi->phy_clock;
+
+	DRM_INFO("host attach, client name=[%s], id=%d\n", mdsi->name, id);
+
+	return 0;
+}
+
+static int dsi_host_detach(struct mipi_dsi_host *host,
+			   struct mipi_dsi_device *mdsi)
+{
+	/* do nothing */
+	return 0;
+}
+
+static int dsi_gen_pkt_hdr_write(void __iomem *base, u32 val)
+{
+	u32 status;
+	int ret;
+
+	ret = readx_poll_timeout(readl, base + CMD_PKT_STATUS, status,
+				 !(status & GEN_CMD_FULL), 1000,
+				 CMD_PKT_STATUS_TIMEOUT_US);
+	if (ret < 0) {
+		DRM_ERROR("failed to get available command FIFO\n");
+		return ret;
+	}
+
+	writel(val, base + GEN_HDR);
+
+	ret = readx_poll_timeout(readl, base + CMD_PKT_STATUS, status,
+				 status & (GEN_CMD_EMPTY | GEN_PLD_W_EMPTY),
+				 1000, CMD_PKT_STATUS_TIMEOUT_US);
+	if (ret < 0) {
+		DRM_ERROR("failed to write command FIFO\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dsi_dcs_short_write(void __iomem *base,
+			       const struct mipi_dsi_msg *msg)
+{
+	const u16 *tx_buf = msg->tx_buf;
+	u32 val = GEN_HDATA(*tx_buf) | GEN_HTYPE(msg->type);
+
+	if (msg->tx_len > 2) {
+		DRM_ERROR("too long tx buf length %zu for short write\n",
+			  msg->tx_len);
+		return -EINVAL;
+	}
+
+	return dsi_gen_pkt_hdr_write(base, val);
+}
+
+static int dsi_dcs_long_write(void __iomem *base,
+			      const struct mipi_dsi_msg *msg)
+{
+	const u32 *tx_buf = msg->tx_buf;
+	int len = msg->tx_len, pld_data_bytes = sizeof(*tx_buf), ret;
+	u32 val = GEN_HDATA(msg->tx_len) | GEN_HTYPE(msg->type);
+	u32 remainder = 0;
+	u32 status;
+
+	if (msg->tx_len < 3) {
+		DRM_ERROR("wrong tx buf length %zu for long write\n",
+			  msg->tx_len);
+		return -EINVAL;
+	}
+
+	while (DIV_ROUND_UP(len, pld_data_bytes)) {
+		if (len < pld_data_bytes) {
+			memcpy(&remainder, tx_buf, len);
+			writel(remainder, base + GEN_PLD_DATA);
+			len = 0;
+		} else {
+			writel(*tx_buf, base + GEN_PLD_DATA);
+			tx_buf++;
+			len -= pld_data_bytes;
+		}
+
+		ret = readx_poll_timeout(readl, base + CMD_PKT_STATUS,
+					 status, !(status & GEN_PLD_W_FULL), 1000,
+					 CMD_PKT_STATUS_TIMEOUT_US);
+		if (ret < 0) {
+			DRM_ERROR("failed to get available write payload FIFO\n");
+			return ret;
+		}
+	}
+
+	return dsi_gen_pkt_hdr_write(base, val);
+}
+
+static ssize_t dsi_host_transfer(struct mipi_dsi_host *host,
+				    const struct mipi_dsi_msg *msg)
+{
+	struct dw_dsi *dsi = host_to_dsi(host);
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	void __iomem *base = ctx->base;
+	int ret;
+
+	switch (msg->type) {
+	case MIPI_DSI_DCS_SHORT_WRITE:
+	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
+	case MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE:
+		ret = dsi_dcs_short_write(base, msg);
+		break;
+	case MIPI_DSI_DCS_LONG_WRITE:
+		ret = dsi_dcs_long_write(base, msg);
+		break;
+	default:
+		DRM_ERROR("unsupported message type\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct mipi_dsi_host_ops dsi_host_ops = {
+	.attach = dsi_host_attach,
+	.detach = dsi_host_detach,
+	.transfer = dsi_host_transfer,
+};
+
+static int dsi_host_init(struct device *dev, struct dw_dsi *dsi)
+{
+	struct mipi_dsi_host *host = &dsi->host;
+	int ret;
+
+	host->dev = dev;
+	host->ops = &dsi_host_ops;
+	ret = mipi_dsi_host_register(host);
+	if (ret) {
+		DRM_ERROR("failed to register dsi host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dsi_bridge_init(struct drm_device *dev, struct dw_dsi *dsi)
+{
+	struct drm_encoder *encoder = &dsi->encoder;
+	struct drm_bridge *bridge = dsi->bridge;
+	int ret;
+
+	/* associate the bridge to dsi encoder */
+	bridge->encoder = encoder;
+
+	ret = drm_bridge_attach(dev, bridge);
+	if (ret) {
+		DRM_ERROR("failed to attach external bridge\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int dsi_connector_get_modes(struct drm_connector *connector)
+{
+	struct dw_dsi *dsi = connector_to_dsi(connector);
+
+	return drm_panel_get_modes(dsi->panel);
+}
+
+static enum drm_mode_status
+dsi_connector_mode_valid(struct drm_connector *connector,
+			 struct drm_display_mode *mode)
+{
+	enum drm_mode_status mode_status = MODE_OK;
+
+	return mode_status;
+}
+
+static struct drm_encoder *
+dsi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct dw_dsi *dsi = connector_to_dsi(connector);
+
+	return &dsi->encoder;
+}
+
+static struct drm_connector_helper_funcs dsi_connector_helper_funcs = {
+	.get_modes = dsi_connector_get_modes,
+	.mode_valid = dsi_connector_mode_valid,
+	.best_encoder = dsi_connector_best_encoder,
+};
+
+static enum drm_connector_status
+dsi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct dw_dsi *dsi = connector_to_dsi(connector);
+	enum drm_connector_status status;
+
+	status = dsi->cur_client == OUT_PANEL ?	connector_status_connected :
+		connector_status_disconnected;
+
+	return status;
+}
+
+static void dsi_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static struct drm_connector_funcs dsi_atomic_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = dsi_connector_detect,
+	.destroy = dsi_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int dsi_connector_init(struct drm_device *dev, struct dw_dsi *dsi)
+{
+	struct drm_encoder *encoder = &dsi->encoder;
+	struct drm_connector *connector = &dsi->connector;
+	int ret;
+
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
+	drm_connector_helper_add(connector,
+				 &dsi_connector_helper_funcs);
+
+	ret = drm_connector_init(dev, &dsi->connector,
+				 &dsi_atomic_connector_funcs,
+				 DRM_MODE_CONNECTOR_DSI);
+	if (ret)
+		return ret;
+
+	ret = drm_mode_connector_attach_encoder(connector, encoder);
+	if (ret)
+		return ret;
+
+	ret = drm_panel_attach(dsi->panel, connector);
+	if (ret)
+		return ret;
+
+	DRM_INFO("connector init\n");
+	return 0;
+}
+static int dsi_bind(struct device *dev, struct device *master, void *data)
+{
+	struct dsi_data *ddata = dev_get_drvdata(dev);
+	struct dw_dsi *dsi = &ddata->dsi;
+	struct drm_device *drm_dev = data;
+	int ret;
+
+	ret = dw_drm_encoder_init(dev, drm_dev, &dsi->encoder);
+	if (ret)
+		return ret;
+
+	if (dsi->bridge) {
+		ret = dsi_bridge_init(drm_dev, dsi);
+		if (ret)
+			return ret;
+	}
+
+	if (dsi->panel) {
+		ret = dsi_connector_init(drm_dev, dsi);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void dsi_unbind(struct device *dev, struct device *master, void *data)
+{
+	/* do nothing */
+}
+
+static const struct component_ops dsi_ops = {
+	.bind	= dsi_bind,
+	.unbind	= dsi_unbind,
+};
+
+static int dsi_parse_bridge_endpoint(struct dw_dsi *dsi,
+				     struct device_node *endpoint)
+{
+	struct device_node *bridge_node;
+	struct drm_bridge *bridge;
+
+	bridge_node = of_graph_get_remote_port_parent(endpoint);
+	if (!bridge_node) {
+		DRM_ERROR("no valid bridge node\n");
+		return -ENODEV;
+	}
+	of_node_put(bridge_node);
+
+	bridge = of_drm_find_bridge(bridge_node);
+	if (!bridge) {
+		DRM_INFO("wait for external HDMI bridge driver.\n");
+		return -EPROBE_DEFER;
+	}
+	dsi->bridge = bridge;
+
+	return 0;
+}
+
+static int dsi_parse_panel_endpoint(struct dw_dsi *dsi,
+				    struct device_node *endpoint)
+{
+	struct device_node *panel_node;
+	struct drm_panel *panel;
+
+	panel_node = of_graph_get_remote_port_parent(endpoint);
+	if (!panel_node) {
+		DRM_ERROR("no valid panel node\n");
+		return -ENODEV;
+	}
+	of_node_put(panel_node);
+
+	panel = of_drm_find_panel(panel_node);
+	if (!panel) {
+		DRM_DEBUG_DRIVER("skip this panel endpoint.\n");
+		return 0;
+	}
+	dsi->panel = panel;
+
+	return 0;
+}
+
+static int dsi_parse_endpoint(struct dw_dsi *dsi,
+			      struct device_node *np,
+			      enum dsi_output_client client)
+{
+	struct device_node *ep_node;
+	struct of_endpoint ep;
+	int ret = 0;
+
+	if (client == OUT_MAX)
+		return -EINVAL;
+
+	for_each_endpoint_of_node(np, ep_node) {
+		ret = of_graph_parse_endpoint(ep_node, &ep);
+		if (ret) {
+			of_node_put(ep_node);
+			return ret;
+		}
+
+		/* skip dsi input port, port == 0 is input port */
+		if (ep.port == 0)
+			continue;
+
+		/* parse bridge endpoint */
+		if (client == OUT_HDMI) {
+			if (ep.id == 0) {
+				ret = dsi_parse_bridge_endpoint(dsi, ep_node);
+				if (dsi->bridge)
+					break;
+			}
+		} else { /* parse panel endpoint */
+			if (ep.id > 0) {
+				ret = dsi_parse_panel_endpoint(dsi, ep_node);
+				if (dsi->panel)
+					break;
+			}
+		}
+
+		if (ret) {
+			of_node_put(ep_node);
+			return ret;
+		}
+	}
+
+	if (!dsi->bridge && !dsi->panel) {
+		DRM_ERROR("at least one bridge or panel node is required\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int dsi_parse_dt(struct platform_device *pdev, struct dw_dsi *dsi)
+{
+	struct dsi_hw_ctx *ctx = dsi->ctx;
+	struct resource *res;
+
+	ctx->pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(ctx->pclk)) {
+		DRM_ERROR("failed to get pclk clock\n");
+		return PTR_ERR(ctx->pclk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ctx->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ctx->base)) {
+		DRM_ERROR("failed to remap dsi io region\n");
+		return PTR_ERR(ctx->base);
+	}
+
+	dsi->gpio_mux = devm_gpiod_get(&pdev->dev, "mux", GPIOD_OUT_HIGH);
+	if (IS_ERR(dsi->gpio_mux))
+		return PTR_ERR(dsi->gpio_mux);
+	/* set dsi default output to panel */
+	dsi->cur_client = OUT_PANEL;
+
+	return 0;
+}
+
+static int dsi_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct dsi_data *data;
+	struct dw_dsi *dsi;
+	struct dsi_hw_ctx *ctx;
+	int ret;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		DRM_ERROR("failed to allocate dsi data.\n");
+		return -ENOMEM;
+	}
+	dsi = &data->dsi;
+	ctx = &data->ctx;
+	dsi->ctx = ctx;
+
+	/* parse HDMI bridge endpoint */
+	ret = dsi_parse_endpoint(dsi, np, OUT_HDMI);
+	if (ret)
+		return ret;
+
+	ret = dsi_host_init(dev, dsi);
+	if (ret)
+		return ret;
+
+	/* parse panel endpoint */
+	ret = dsi_parse_endpoint(dsi, np, OUT_PANEL);
+	if (ret)
+		goto err_host_unregister;
+
+	ret = dsi_parse_dt(pdev, dsi);
+	if (ret)
+		goto err_host_unregister;
+
+	platform_set_drvdata(pdev, data);
+
+	ret = component_add(dev, &dsi_ops);
+	if (ret)
+		goto err_host_unregister;
+
+	return 0;
+
+err_host_unregister:
+	mipi_dsi_host_unregister(&dsi->host);
+	return ret;
+}
+
+static int dsi_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &dsi_ops);
+
+	return 0;
+}
+
+static const struct of_device_id dsi_of_match[] = {
+	{.compatible = "hisilicon,hi6220-dsi"},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, dsi_of_match);
+
+static struct platform_driver dsi_driver = {
+	.probe = dsi_probe,
+	.remove = dsi_remove,
+	.driver = {
+		.name = "dw-dsi",
+		.of_match_table = dsi_of_match,
+	},
+};
+
+module_platform_driver(dsi_driver);
+
+MODULE_AUTHOR("Xinliang Liu <xinliang.liu@linaro.org>");
+MODULE_AUTHOR("Xinliang Liu <z.liuxinliang@hisilicon.com>");
+MODULE_AUTHOR("Xinwei Kong <kong.kongxinwei@hisilicon.com>");
+MODULE_DESCRIPTION("DesignWare MIPI DSI Host Controller v1.02 driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h b/drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h
new file mode 100644
index 000000000000..17f9c050d4bb
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_dsi_reg.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __DW_DSI_REG_H__
+#define __DW_DSI_REG_H__
+
+#define MASK(x)				(BIT(x) - 1)
+
+/*
+ * regs
+ */
+#define PWR_UP                  0x04  /* Core power-up */
+#define RESET                   0
+#define POWERUP                 BIT(0)
+#define PHY_IF_CFG              0xA4  /* D-PHY interface configuration */
+#define CLKMGR_CFG              0x08  /* the internal clock dividers */
+#define PHY_RSTZ                0xA0  /* D-PHY reset control */
+#define PHY_ENABLECLK           BIT(2)
+#define PHY_UNRSTZ              BIT(1)
+#define PHY_UNSHUTDOWNZ         BIT(0)
+#define PHY_TST_CTRL0           0xB4  /* D-PHY test interface control 0 */
+#define PHY_TST_CTRL1           0xB8  /* D-PHY test interface control 1 */
+#define CLK_TLPX                0x10
+#define CLK_THS_PREPARE         0x11
+#define CLK_THS_ZERO            0x12
+#define CLK_THS_TRAIL           0x13
+#define CLK_TWAKEUP             0x14
+#define DATA_TLPX(x)            (0x20 + ((x) << 4))
+#define DATA_THS_PREPARE(x)     (0x21 + ((x) << 4))
+#define DATA_THS_ZERO(x)        (0x22 + ((x) << 4))
+#define DATA_THS_TRAIL(x)       (0x23 + ((x) << 4))
+#define DATA_TTA_GO(x)          (0x24 + ((x) << 4))
+#define DATA_TTA_GET(x)         (0x25 + ((x) << 4))
+#define DATA_TWAKEUP(x)         (0x26 + ((x) << 4))
+#define PHY_CFG_I               0x60
+#define PHY_CFG_PLL_I           0x63
+#define PHY_CFG_PLL_II          0x64
+#define PHY_CFG_PLL_III         0x65
+#define PHY_CFG_PLL_IV          0x66
+#define PHY_CFG_PLL_V           0x67
+#define DPI_COLOR_CODING        0x10  /* DPI color coding */
+#define DPI_CFG_POL             0x14  /* DPI polarity configuration */
+#define VID_HSA_TIME            0x48  /* Horizontal Sync Active time */
+#define VID_HBP_TIME            0x4C  /* Horizontal Back Porch time */
+#define VID_HLINE_TIME          0x50  /* Line time */
+#define VID_VSA_LINES           0x54  /* Vertical Sync Active period */
+#define VID_VBP_LINES           0x58  /* Vertical Back Porch period */
+#define VID_VFP_LINES           0x5C  /* Vertical Front Porch period */
+#define VID_VACTIVE_LINES       0x60  /* Vertical resolution */
+#define VID_PKT_SIZE            0x3C  /* Video packet size */
+#define VID_MODE_CFG            0x38  /* Video mode configuration */
+#define GEN_HDR			0x6c
+#define GEN_HDATA(data)		(((data) & 0xffff) << 8)
+#define GEN_HDATA_MASK		(0xffff << 8)
+#define GEN_HTYPE(type)		(((type) & 0xff) << 0)
+#define GEN_HTYPE_MASK		0xff
+#define GEN_PLD_DATA		0x70
+#define CMD_PKT_STATUS		0x74
+#define GEN_CMD_EMPTY		BIT(0)
+#define GEN_CMD_FULL		BIT(1)
+#define GEN_PLD_W_EMPTY		BIT(2)
+#define GEN_PLD_W_FULL		BIT(3)
+#define GEN_PLD_R_EMPTY		BIT(4)
+#define GEN_PLD_R_FULL		BIT(5)
+#define GEN_RD_CMD_BUSY		BIT(6)
+#define CMD_MODE_CFG		0x68
+#define MAX_RD_PKT_SIZE_LP	BIT(24)
+#define DCS_LW_TX_LP		BIT(19)
+#define DCS_SR_0P_TX_LP		BIT(18)
+#define DCS_SW_1P_TX_LP		BIT(17)
+#define DCS_SW_0P_TX_LP		BIT(16)
+#define GEN_LW_TX_LP		BIT(14)
+#define GEN_SR_2P_TX_LP		BIT(13)
+#define GEN_SR_1P_TX_LP		BIT(12)
+#define GEN_SR_0P_TX_LP		BIT(11)
+#define GEN_SW_2P_TX_LP		BIT(10)
+#define GEN_SW_1P_TX_LP		BIT(9)
+#define GEN_SW_0P_TX_LP		BIT(8)
+#define EN_ACK_RQST		BIT(1)
+#define EN_TEAR_FX		BIT(0)
+#define CMD_MODE_ALL_LP		(MAX_RD_PKT_SIZE_LP | \
+				 DCS_LW_TX_LP | \
+				 DCS_SR_0P_TX_LP | \
+				 DCS_SW_1P_TX_LP | \
+				 DCS_SW_0P_TX_LP | \
+				 GEN_LW_TX_LP | \
+				 GEN_SR_2P_TX_LP | \
+				 GEN_SR_1P_TX_LP | \
+				 GEN_SR_0P_TX_LP | \
+				 GEN_SW_2P_TX_LP | \
+				 GEN_SW_1P_TX_LP | \
+				 GEN_SW_0P_TX_LP)
+#define PHY_TMR_CFG             0x9C  /* Data lanes timing configuration */
+#define BTA_TO_CNT              0x8C  /* Response timeout definition */
+#define PHY_TMR_LPCLK_CFG       0x98  /* clock lane timing configuration */
+#define CLK_DATA_TMR_CFG        0xCC
+#define LPCLK_CTRL              0x94  /* Low-power in clock lane */
+#define PHY_TXREQUESTCLKHS      BIT(0)
+#define MODE_CFG                0x34  /* Video or Command mode selection */
+#define PHY_STATUS              0xB0  /* D-PHY PPI status interface */
+
+#define	PHY_STOP_WAIT_TIME      0x30
+#define CMD_PKT_STATUS_TIMEOUT_US	20000
+
+/*
+ * regs relevant enum
+ */
+enum dpi_color_coding {
+	DSI_24BITS_1 = 5,
+};
+
+enum dsi_video_mode_type {
+	DSI_NON_BURST_SYNC_PULSES = 0,
+	DSI_NON_BURST_SYNC_EVENTS,
+	DSI_BURST_SYNC_PULSES_1,
+	DSI_BURST_SYNC_PULSES_2
+};
+
+enum dsi_work_mode {
+	DSI_VIDEO_MODE = 0,
+	DSI_COMMAND_MODE
+};
+
+/*
+ * Register Write/Read Helper functions
+ */
+static inline void dw_update_bits(void __iomem *addr, u32 bit_start,
+				  u32 mask, u32 val)
+{
+	u32 tmp, orig;
+
+	orig = readl(addr);
+	tmp = orig & ~(mask << bit_start);
+	tmp |= (val & mask) << bit_start;
+	writel(tmp, addr);
+}
+
+#endif /* __DW_DRM_DSI_H__ */
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h
new file mode 100644
index 000000000000..4cf281b7ed63
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __KIRIN_ADE_REG_H__
+#define __KIRIN_ADE_REG_H__
+
+/*
+ * ADE Registers
+ */
+#define MASK(x)				(BIT(x) - 1)
+
+#define ADE_CTRL			0x0004
+#define FRM_END_START_OFST		0
+#define FRM_END_START_MASK		MASK(2)
+#define AUTO_CLK_GATE_EN_OFST		0
+#define AUTO_CLK_GATE_EN		BIT(0)
+#define ADE_DISP_SRC_CFG		0x0018
+#define ADE_CTRL1			0x008C
+#define ADE_EN				0x0100
+#define ADE_DISABLE			0
+#define ADE_ENABLE			1
+/* reset and reload regs */
+#define ADE_SOFT_RST_SEL(x)		(0x0078 + (x) * 0x4)
+#define ADE_RELOAD_DIS(x)		(0x00AC + (x) * 0x4)
+#define RDMA_OFST			0
+#define CLIP_OFST			15
+#define SCL_OFST			21
+#define CTRAN_OFST			24
+#define OVLY_OFST			37 /* 32+5 */
+/* channel regs */
+#define RD_CH_CTRL(x)			(0x1004 + (x) * 0x80)
+#define RD_CH_ADDR(x)			(0x1008 + (x) * 0x80)
+#define RD_CH_SIZE(x)			(0x100C + (x) * 0x80)
+#define RD_CH_STRIDE(x)			(0x1010 + (x) * 0x80)
+#define RD_CH_SPACE(x)			(0x1014 + (x) * 0x80)
+#define RD_CH_EN(x)			(0x1020 + (x) * 0x80)
+/* overlay regs */
+#define ADE_OVLY1_TRANS_CFG		0x002C
+#define ADE_OVLY_CTL			0x0098
+#define ADE_OVLY_CH_XY0(x)		(0x2004 + (x) * 4)
+#define ADE_OVLY_CH_XY1(x)		(0x2024 + (x) * 4)
+#define ADE_OVLY_CH_CTL(x)		(0x204C + (x) * 4)
+#define ADE_OVLY_OUTPUT_SIZE(x)		(0x2070 + (x) * 8)
+#define OUTPUT_XSIZE_OFST		16
+#define ADE_OVLYX_CTL(x)		(0x209C + (x) * 4)
+#define CH_OVLY_SEL_OFST(x)		((x) * 4)
+#define CH_OVLY_SEL_MASK		MASK(2)
+#define CH_OVLY_SEL_VAL(x)		((x) + 1)
+#define CH_ALP_MODE_OFST		0
+#define CH_ALP_SEL_OFST			2
+#define CH_UNDER_ALP_SEL_OFST		4
+#define CH_EN_OFST			6
+#define CH_ALP_GBL_OFST			15
+#define CH_SEL_OFST			28
+/* ctran regs */
+#define ADE_CTRAN_DIS(x)		(0x5004 + (x) * 0x100)
+#define CTRAN_BYPASS_ON			1
+#define CTRAN_BYPASS_OFF		0
+#define ADE_CTRAN_IMAGE_SIZE(x)		(0x503C + (x) * 0x100)
+/* clip regs */
+#define ADE_CLIP_DISABLE(x)		(0x6800 + (x) * 0x100)
+#define ADE_CLIP_SIZE0(x)		(0x6804 + (x) * 0x100)
+#define ADE_CLIP_SIZE1(x)		(0x6808 + (x) * 0x100)
+
+/*
+ * LDI Registers
+ */
+#define LDI_HRZ_CTRL0			0x7400
+#define HBP_OFST			20
+#define LDI_HRZ_CTRL1			0x7404
+#define LDI_VRT_CTRL0			0x7408
+#define VBP_OFST			20
+#define LDI_VRT_CTRL1			0x740C
+#define LDI_PLR_CTRL			0x7410
+#define FLAG_NVSYNC			BIT(0)
+#define FLAG_NHSYNC			BIT(1)
+#define FLAG_NPIXCLK			BIT(2)
+#define FLAG_NDE			BIT(3)
+#define LDI_DSP_SIZE			0x7414
+#define VSIZE_OFST			20
+#define LDI_INT_EN			0x741C
+#define FRAME_END_INT_EN_OFST		1
+#define LDI_CTRL			0x7420
+#define BPP_OFST			3
+#define DATA_GATE_EN			BIT(2)
+#define LDI_EN				BIT(0)
+#define LDI_MSK_INT			0x7428
+#define LDI_INT_CLR			0x742C
+#define LDI_WORK_MODE			0x7430
+#define LDI_HDMI_DSI_GT			0x7434
+
+/*
+ * ADE media bus service regs
+ */
+#define ADE0_QOSGENERATOR_MODE		0x010C
+#define QOSGENERATOR_MODE_MASK		MASK(2)
+#define ADE0_QOSGENERATOR_EXTCONTROL	0x0118
+#define SOCKET_QOS_EN			BIT(0)
+#define ADE1_QOSGENERATOR_MODE		0x020C
+#define ADE1_QOSGENERATOR_EXTCONTROL	0x0218
+
+/*
+ * ADE regs relevant enums
+ */
+enum frame_end_start {
+	/* regs take effect in every vsync */
+	REG_EFFECTIVE_IN_VSYNC = 0,
+	/* regs take effect in fist ade en and every frame end */
+	REG_EFFECTIVE_IN_ADEEN_FRMEND,
+	/* regs take effect in ade en immediately */
+	REG_EFFECTIVE_IN_ADEEN,
+	/* regs take effect in first vsync and every frame end */
+	REG_EFFECTIVE_IN_VSYNC_FRMEND
+};
+
+enum ade_fb_format {
+	ADE_RGB_565 = 0,
+	ADE_BGR_565,
+	ADE_XRGB_8888,
+	ADE_XBGR_8888,
+	ADE_ARGB_8888,
+	ADE_ABGR_8888,
+	ADE_RGBA_8888,
+	ADE_BGRA_8888,
+	ADE_RGB_888,
+	ADE_BGR_888 = 9,
+	ADE_FORMAT_UNSUPPORT = 800
+};
+
+enum ade_channel {
+	ADE_CH1 = 0,	/* channel 1 for primary plane */
+	ADE_CH_NUM
+};
+
+enum ade_scale {
+	ADE_SCL1 = 0,
+	ADE_SCL2,
+	ADE_SCL3,
+	ADE_SCL_NUM
+};
+
+enum ade_ctran {
+	ADE_CTRAN1 = 0,
+	ADE_CTRAN2,
+	ADE_CTRAN3,
+	ADE_CTRAN4,
+	ADE_CTRAN5,
+	ADE_CTRAN6,
+	ADE_CTRAN_NUM
+};
+
+enum ade_overlay {
+	ADE_OVLY1 = 0,
+	ADE_OVLY2,
+	ADE_OVLY3,
+	ADE_OVLY_NUM
+};
+
+enum ade_alpha_mode {
+	ADE_ALP_GLOBAL = 0,
+	ADE_ALP_PIXEL,
+	ADE_ALP_PIXEL_AND_GLB
+};
+
+enum ade_alpha_blending_mode {
+	ADE_ALP_MUL_COEFF_0 = 0,	/* alpha */
+	ADE_ALP_MUL_COEFF_1,		/* 1-alpha */
+	ADE_ALP_MUL_COEFF_2,		/* 0 */
+	ADE_ALP_MUL_COEFF_3		/* 1 */
+};
+
+/*
+ * LDI regs relevant enums
+ */
+enum dsi_pclk_en {
+	DSI_PCLK_ON = 0,
+	DSI_PCLK_OFF
+};
+
+enum ldi_output_format {
+	LDI_OUT_RGB_565 = 0,
+	LDI_OUT_RGB_666,
+	LDI_OUT_RGB_888
+};
+
+enum ldi_work_mode {
+	TEST_MODE = 0,
+	NORMAL_MODE
+};
+
+enum ldi_input_source {
+	DISP_SRC_NONE = 0,
+	DISP_SRC_OVLY2,
+	DISP_SRC_DISP,
+	DISP_SRC_ROT,
+	DISP_SRC_SCL2
+};
+
+/*
+ * ADE media bus service relevant enums
+ */
+enum qos_generator_mode {
+	FIXED_MODE = 0,
+	LIMITER_MODE,
+	BYPASS_MODE,
+	REGULATOR_MODE
+};
+
+/*
+ * Register Write/Read Helper functions
+ */
+static inline void ade_update_bits(void __iomem *addr, u32 bit_start,
+				   u32 mask, u32 val)
+{
+	u32 tmp, orig;
+
+	orig = readl(addr);
+	tmp = orig & ~(mask << bit_start);
+	tmp |= (val & mask) << bit_start;
+	writel(tmp, addr);
+}
+
+#endif
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
new file mode 100644
index 000000000000..971e48bd9f04
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -0,0 +1,1057 @@
+/*
+ * Hisilicon Hi6220 SoC ADE(Advanced Display Engine)'s crtc&plane driver
+ *
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * Author:
+ *	Xinliang Liu <z.liuxinliang@hisilicon.com>
+ *	Xinliang Liu <xinliang.liu@linaro.org>
+ *	Xinwei Kong <kong.kongxinwei@hisilicon.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <video/display_timing.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+
+#include "kirin_drm_drv.h"
+#include "kirin_ade_reg.h"
+
+#define PRIMARY_CH	ADE_CH1 /* primary plane */
+#define OUT_OVLY	ADE_OVLY2 /* output overlay compositor */
+#define ADE_DEBUG	1
+
+#define to_ade_crtc(crtc) \
+	container_of(crtc, struct ade_crtc, base)
+
+#define to_ade_plane(plane) \
+	container_of(plane, struct ade_plane, base)
+
+struct ade_hw_ctx {
+	void __iomem  *base;
+	struct regmap *noc_regmap;
+	struct clk *ade_core_clk;
+	struct clk *media_noc_clk;
+	struct clk *ade_pix_clk;
+	struct reset_control *reset;
+	bool power_on;
+	int irq;
+};
+
+struct ade_crtc {
+	struct drm_crtc base;
+	struct ade_hw_ctx *ctx;
+	bool enable;
+	u32 out_format;
+};
+
+struct ade_plane {
+	struct drm_plane base;
+	void *ctx;
+	u8 ch; /* channel */
+};
+
+struct ade_data {
+	struct ade_crtc acrtc;
+	struct ade_plane aplane[ADE_CH_NUM];
+	struct ade_hw_ctx ctx;
+};
+
+/* ade-format info: */
+struct ade_format {
+	u32 pixel_format;
+	enum ade_fb_format ade_format;
+};
+
+static const struct ade_format ade_formats[] = {
+	/* 16bpp RGB: */
+	{ DRM_FORMAT_RGB565, ADE_RGB_565 },
+	{ DRM_FORMAT_BGR565, ADE_BGR_565 },
+	/* 24bpp RGB: */
+	{ DRM_FORMAT_RGB888, ADE_RGB_888 },
+	{ DRM_FORMAT_BGR888, ADE_BGR_888 },
+	/* 32bpp [A]RGB: */
+	{ DRM_FORMAT_XRGB8888, ADE_XRGB_8888 },
+	{ DRM_FORMAT_XBGR8888, ADE_XBGR_8888 },
+	{ DRM_FORMAT_RGBA8888, ADE_RGBA_8888 },
+	{ DRM_FORMAT_BGRA8888, ADE_BGRA_8888 },
+	{ DRM_FORMAT_ARGB8888, ADE_ARGB_8888 },
+	{ DRM_FORMAT_ABGR8888, ADE_ABGR_8888 },
+};
+
+static const u32 channel_formats1[] = {
+	/* channel 1,2,3,4 */
+	DRM_FORMAT_RGB565, DRM_FORMAT_BGR565, DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGR888, DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_RGBA8888, DRM_FORMAT_BGRA8888, DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888
+};
+
+u32 ade_get_channel_formats(u8 ch, const u32 **formats)
+{
+	switch (ch) {
+	case ADE_CH1:
+		*formats = channel_formats1;
+		return ARRAY_SIZE(channel_formats1);
+	default:
+		DRM_ERROR("no this channel %d\n", ch);
+		*formats = NULL;
+		return 0;
+	}
+}
+
+/* convert from fourcc format to ade format */
+static u32 ade_get_format(u32 pixel_format)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ade_formats); i++)
+		if (ade_formats[i].pixel_format == pixel_format)
+			return ade_formats[i].ade_format;
+
+	/* not found */
+	DRM_ERROR("Not found pixel format!!fourcc_format= %d\n",
+		  pixel_format);
+	return ADE_FORMAT_UNSUPPORT;
+}
+
+static void ade_update_reload_bit(void __iomem *base, u32 bit_num, u32 val)
+{
+	u32 bit_ofst, reg_num;
+
+	bit_ofst = bit_num % 32;
+	reg_num = bit_num / 32;
+
+	ade_update_bits(base + ADE_RELOAD_DIS(reg_num), bit_ofst,
+			MASK(1), !!val);
+}
+
+static u32 ade_read_reload_bit(void __iomem *base, u32 bit_num)
+{
+	u32 tmp, bit_ofst, reg_num;
+
+	bit_ofst = bit_num % 32;
+	reg_num = bit_num / 32;
+
+	tmp = readl(base + ADE_RELOAD_DIS(reg_num));
+	return !!(BIT(bit_ofst) & tmp);
+}
+
+static void ade_init(struct ade_hw_ctx *ctx)
+{
+	void __iomem *base = ctx->base;
+
+	/* enable clk gate */
+	ade_update_bits(base + ADE_CTRL1, AUTO_CLK_GATE_EN_OFST,
+			AUTO_CLK_GATE_EN, ADE_ENABLE);
+	/* clear overlay */
+	writel(0, base + ADE_OVLY1_TRANS_CFG);
+	writel(0, base + ADE_OVLY_CTL);
+	writel(0, base + ADE_OVLYX_CTL(OUT_OVLY));
+	/* clear reset and reload regs */
+	writel(MASK(32), base + ADE_SOFT_RST_SEL(0));
+	writel(MASK(32), base + ADE_SOFT_RST_SEL(1));
+	writel(MASK(32), base + ADE_RELOAD_DIS(0));
+	writel(MASK(32), base + ADE_RELOAD_DIS(1));
+	/*
+	 * for video mode, all the ade registers should
+	 * become effective at frame end.
+	 */
+	ade_update_bits(base + ADE_CTRL, FRM_END_START_OFST,
+			FRM_END_START_MASK, REG_EFFECTIVE_IN_ADEEN_FRMEND);
+}
+
+static void ade_set_pix_clk(struct ade_hw_ctx *ctx,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adj_mode)
+{
+	u32 clk_Hz = mode->clock * 1000;
+	int ret;
+
+	/*
+	 * Success should be guaranteed in mode_valid call back,
+	 * so failure shouldn't happen here
+	 */
+	ret = clk_set_rate(ctx->ade_pix_clk, clk_Hz);
+	if (ret)
+		DRM_ERROR("failed to set pixel clk %dHz (%d)\n", clk_Hz, ret);
+	adj_mode->clock = clk_get_rate(ctx->ade_pix_clk) / 1000;
+}
+
+static void ade_ldi_set_mode(struct ade_crtc *acrtc,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adj_mode)
+{
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	void __iomem *base = ctx->base;
+	u32 width = mode->hdisplay;
+	u32 height = mode->vdisplay;
+	u32 hfp, hbp, hsw, vfp, vbp, vsw;
+	u32 plr_flags;
+
+	plr_flags = (mode->flags & DRM_MODE_FLAG_NVSYNC) ? FLAG_NVSYNC : 0;
+	plr_flags |= (mode->flags & DRM_MODE_FLAG_NHSYNC) ? FLAG_NHSYNC : 0;
+	hfp = mode->hsync_start - mode->hdisplay;
+	hbp = mode->htotal - mode->hsync_end;
+	hsw = mode->hsync_end - mode->hsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+	vsw = mode->vsync_end - mode->vsync_start;
+	if (vsw > 15) {
+		DRM_DEBUG_DRIVER("vsw exceeded 15\n");
+		vsw = 15;
+	}
+
+	writel((hbp << HBP_OFST) | hfp, base + LDI_HRZ_CTRL0);
+	 /* the configured value is actual value - 1 */
+	writel(hsw - 1, base + LDI_HRZ_CTRL1);
+	writel((vbp << VBP_OFST) | vfp, base + LDI_VRT_CTRL0);
+	 /* the configured value is actual value - 1 */
+	writel(vsw - 1, base + LDI_VRT_CTRL1);
+	 /* the configured value is actual value - 1 */
+	writel(((height - 1) << VSIZE_OFST) | (width - 1),
+	       base + LDI_DSP_SIZE);
+	writel(plr_flags, base + LDI_PLR_CTRL);
+
+	/* set overlay compositor output size */
+	writel(((width - 1) << OUTPUT_XSIZE_OFST) | (height - 1),
+	       base + ADE_OVLY_OUTPUT_SIZE(OUT_OVLY));
+
+	/* ctran6 setting */
+	writel(CTRAN_BYPASS_ON, base + ADE_CTRAN_DIS(ADE_CTRAN6));
+	 /* the configured value is actual value - 1 */
+	writel(width * height - 1, base + ADE_CTRAN_IMAGE_SIZE(ADE_CTRAN6));
+	ade_update_reload_bit(base, CTRAN_OFST + ADE_CTRAN6, 0);
+
+	ade_set_pix_clk(ctx, mode, adj_mode);
+
+	DRM_DEBUG_DRIVER("set mode: %dx%d\n", width, height);
+}
+
+static int ade_power_up(struct ade_hw_ctx *ctx)
+{
+	int ret;
+
+	ret = clk_prepare_enable(ctx->media_noc_clk);
+	if (ret) {
+		DRM_ERROR("failed to enable media_noc_clk (%d)\n", ret);
+		return ret;
+	}
+
+	ret = reset_control_deassert(ctx->reset);
+	if (ret) {
+		DRM_ERROR("failed to deassert reset\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->ade_core_clk);
+	if (ret) {
+		DRM_ERROR("failed to enable ade_core_clk (%d)\n", ret);
+		return ret;
+	}
+
+	ade_init(ctx);
+	ctx->power_on = true;
+	return 0;
+}
+
+static void ade_power_down(struct ade_hw_ctx *ctx)
+{
+	void __iomem *base = ctx->base;
+
+	writel(ADE_DISABLE, base + LDI_CTRL);
+	/* dsi pixel off */
+	writel(DSI_PCLK_OFF, base + LDI_HDMI_DSI_GT);
+
+	clk_disable_unprepare(ctx->ade_core_clk);
+	reset_control_assert(ctx->reset);
+	clk_disable_unprepare(ctx->media_noc_clk);
+	ctx->power_on = false;
+}
+
+static void ade_set_medianoc_qos(struct ade_crtc *acrtc)
+{
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	struct regmap *map = ctx->noc_regmap;
+
+	regmap_update_bits(map, ADE0_QOSGENERATOR_MODE,
+			   QOSGENERATOR_MODE_MASK, BYPASS_MODE);
+	regmap_update_bits(map, ADE0_QOSGENERATOR_EXTCONTROL,
+			   SOCKET_QOS_EN, SOCKET_QOS_EN);
+
+	regmap_update_bits(map, ADE1_QOSGENERATOR_MODE,
+			   QOSGENERATOR_MODE_MASK, BYPASS_MODE);
+	regmap_update_bits(map, ADE1_QOSGENERATOR_EXTCONTROL,
+			   SOCKET_QOS_EN, SOCKET_QOS_EN);
+}
+
+static int ade_enable_vblank(struct drm_device *dev, unsigned int pipe)
+{
+	struct kirin_drm_private *priv = dev->dev_private;
+	struct ade_crtc *acrtc = to_ade_crtc(priv->crtc[pipe]);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	void __iomem *base = ctx->base;
+
+	if (!ctx->power_on)
+		(void)ade_power_up(ctx);
+
+	ade_update_bits(base + LDI_INT_EN, FRAME_END_INT_EN_OFST,
+			MASK(1), 1);
+
+	return 0;
+}
+
+static void ade_disable_vblank(struct drm_device *dev, unsigned int pipe)
+{
+	struct kirin_drm_private *priv = dev->dev_private;
+	struct ade_crtc *acrtc = to_ade_crtc(priv->crtc[pipe]);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	void __iomem *base = ctx->base;
+
+	if (!ctx->power_on) {
+		DRM_ERROR("power is down! vblank disable fail\n");
+		return;
+	}
+
+	ade_update_bits(base + LDI_INT_EN, FRAME_END_INT_EN_OFST,
+			MASK(1), 0);
+}
+
+static irqreturn_t ade_irq_handler(int irq, void *data)
+{
+	struct ade_crtc *acrtc = data;
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	struct drm_crtc *crtc = &acrtc->base;
+	void __iomem *base = ctx->base;
+	u32 status;
+
+	status = readl(base + LDI_MSK_INT);
+	DRM_DEBUG_VBL("LDI IRQ: status=0x%X\n", status);
+
+	/* vblank irq */
+	if (status & BIT(FRAME_END_INT_EN_OFST)) {
+		ade_update_bits(base + LDI_INT_CLR, FRAME_END_INT_EN_OFST,
+				MASK(1), 1);
+		drm_crtc_handle_vblank(crtc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void ade_display_enable(struct ade_crtc *acrtc)
+{
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	void __iomem *base = ctx->base;
+	u32 out_fmt = acrtc->out_format;
+
+	/* enable output overlay compositor */
+	writel(ADE_ENABLE, base + ADE_OVLYX_CTL(OUT_OVLY));
+	ade_update_reload_bit(base, OVLY_OFST + OUT_OVLY, 0);
+
+	/* display source setting */
+	writel(DISP_SRC_OVLY2, base + ADE_DISP_SRC_CFG);
+
+	/* enable ade */
+	writel(ADE_ENABLE, base + ADE_EN);
+	/* enable ldi */
+	writel(NORMAL_MODE, base + LDI_WORK_MODE);
+	writel((out_fmt << BPP_OFST) | DATA_GATE_EN | LDI_EN,
+	       base + LDI_CTRL);
+	/* dsi pixel on */
+	writel(DSI_PCLK_ON, base + LDI_HDMI_DSI_GT);
+}
+
+#if ADE_DEBUG
+static void ade_rdma_dump_regs(void __iomem *base, u32 ch)
+{
+	u32 reg_ctrl, reg_addr, reg_size, reg_stride, reg_space, reg_en;
+	u32 val;
+
+	reg_ctrl = RD_CH_CTRL(ch);
+	reg_addr = RD_CH_ADDR(ch);
+	reg_size = RD_CH_SIZE(ch);
+	reg_stride = RD_CH_STRIDE(ch);
+	reg_space = RD_CH_SPACE(ch);
+	reg_en = RD_CH_EN(ch);
+
+	val = ade_read_reload_bit(base, RDMA_OFST + ch);
+	DRM_DEBUG_DRIVER("[rdma%d]: reload(%d)\n", ch + 1, val);
+	val = readl(base + reg_ctrl);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_ctrl(0x%08x)\n", ch + 1, val);
+	val = readl(base + reg_addr);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_addr(0x%08x)\n", ch + 1, val);
+	val = readl(base + reg_size);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_size(0x%08x)\n", ch + 1, val);
+	val = readl(base + reg_stride);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_stride(0x%08x)\n", ch + 1, val);
+	val = readl(base + reg_space);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_space(0x%08x)\n", ch + 1, val);
+	val = readl(base + reg_en);
+	DRM_DEBUG_DRIVER("[rdma%d]: reg_en(0x%08x)\n", ch + 1, val);
+}
+
+static void ade_clip_dump_regs(void __iomem *base, u32 ch)
+{
+	u32 val;
+
+	val = ade_read_reload_bit(base, CLIP_OFST + ch);
+	DRM_DEBUG_DRIVER("[clip%d]: reload(%d)\n", ch + 1, val);
+	val = readl(base + ADE_CLIP_DISABLE(ch));
+	DRM_DEBUG_DRIVER("[clip%d]: reg_clip_disable(0x%08x)\n", ch + 1, val);
+	val = readl(base + ADE_CLIP_SIZE0(ch));
+	DRM_DEBUG_DRIVER("[clip%d]: reg_clip_size0(0x%08x)\n", ch + 1, val);
+	val = readl(base + ADE_CLIP_SIZE1(ch));
+	DRM_DEBUG_DRIVER("[clip%d]: reg_clip_size1(0x%08x)\n", ch + 1, val);
+}
+
+static void ade_compositor_routing_dump_regs(void __iomem *base, u32 ch)
+{
+	u8 ovly_ch = 0; /* TODO: Only primary plane now */
+	u32 val;
+
+	val = readl(base + ADE_OVLY_CH_XY0(ovly_ch));
+	DRM_DEBUG_DRIVER("[overlay ch%d]: reg_ch_xy0(0x%08x)\n", ovly_ch, val);
+	val = readl(base + ADE_OVLY_CH_XY1(ovly_ch));
+	DRM_DEBUG_DRIVER("[overlay ch%d]: reg_ch_xy1(0x%08x)\n", ovly_ch, val);
+	val = readl(base + ADE_OVLY_CH_CTL(ovly_ch));
+	DRM_DEBUG_DRIVER("[overlay ch%d]: reg_ch_ctl(0x%08x)\n", ovly_ch, val);
+}
+
+static void ade_dump_overlay_compositor_regs(void __iomem *base, u32 comp)
+{
+	u32 val;
+
+	val = ade_read_reload_bit(base, OVLY_OFST + comp);
+	DRM_DEBUG_DRIVER("[overlay%d]: reload(%d)\n", comp + 1, val);
+	writel(ADE_ENABLE, base + ADE_OVLYX_CTL(comp));
+	DRM_DEBUG_DRIVER("[overlay%d]: reg_ctl(0x%08x)\n", comp + 1, val);
+	val = readl(base + ADE_OVLY_CTL);
+	DRM_DEBUG_DRIVER("ovly_ctl(0x%08x)\n", val);
+}
+
+static void ade_dump_regs(void __iomem *base)
+{
+	u32 i;
+
+	/* dump channel regs */
+	for (i = 0; i < ADE_CH_NUM; i++) {
+		/* dump rdma regs */
+		ade_rdma_dump_regs(base, i);
+
+		/* dump clip regs */
+		ade_clip_dump_regs(base, i);
+
+		/* dump compositor routing regs */
+		ade_compositor_routing_dump_regs(base, i);
+	}
+
+	/* dump overlay compositor regs */
+	ade_dump_overlay_compositor_regs(base, OUT_OVLY);
+}
+#else
+static void ade_dump_regs(void __iomem *base) { }
+#endif
+
+static void ade_crtc_enable(struct drm_crtc *crtc)
+{
+	struct ade_crtc *acrtc = to_ade_crtc(crtc);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	int ret;
+
+	if (acrtc->enable)
+		return;
+
+	if (!ctx->power_on) {
+		ret = ade_power_up(ctx);
+		if (ret)
+			return;
+	}
+
+	ade_set_medianoc_qos(acrtc);
+	ade_display_enable(acrtc);
+	ade_dump_regs(ctx->base);
+	acrtc->enable = true;
+}
+
+static void ade_crtc_disable(struct drm_crtc *crtc)
+{
+	struct ade_crtc *acrtc = to_ade_crtc(crtc);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+
+	if (!acrtc->enable)
+		return;
+
+	ade_power_down(ctx);
+	acrtc->enable = false;
+}
+
+static int ade_crtc_atomic_check(struct drm_crtc *crtc,
+				 struct drm_crtc_state *state)
+{
+	/* do nothing */
+	return 0;
+}
+
+static void ade_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct ade_crtc *acrtc = to_ade_crtc(crtc);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	struct drm_display_mode *mode = &crtc->state->mode;
+	struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode;
+
+	if (!ctx->power_on)
+		(void)ade_power_up(ctx);
+	ade_ldi_set_mode(acrtc, mode, adj_mode);
+}
+
+static void ade_crtc_atomic_begin(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_state)
+{
+	struct ade_crtc *acrtc = to_ade_crtc(crtc);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+
+	if (!ctx->power_on)
+		(void)ade_power_up(ctx);
+}
+
+static void ade_crtc_atomic_flush(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_state)
+
+{
+	struct ade_crtc *acrtc = to_ade_crtc(crtc);
+	struct ade_hw_ctx *ctx = acrtc->ctx;
+	void __iomem *base = ctx->base;
+
+	/* only crtc is enabled regs take effect */
+	if (acrtc->enable) {
+		ade_dump_regs(base);
+		/* flush ade registers */
+		writel(ADE_ENABLE, base + ADE_EN);
+	}
+}
+
+static const struct drm_crtc_helper_funcs ade_crtc_helper_funcs = {
+	.enable		= ade_crtc_enable,
+	.disable	= ade_crtc_disable,
+	.atomic_check	= ade_crtc_atomic_check,
+	.mode_set_nofb	= ade_crtc_mode_set_nofb,
+	.atomic_begin	= ade_crtc_atomic_begin,
+	.atomic_flush	= ade_crtc_atomic_flush,
+};
+
+static const struct drm_crtc_funcs ade_crtc_funcs = {
+	.destroy	= drm_crtc_cleanup,
+	.set_config	= drm_atomic_helper_set_config,
+	.page_flip	= drm_atomic_helper_page_flip,
+	.reset		= drm_atomic_helper_crtc_reset,
+	.set_property = drm_atomic_helper_crtc_set_property,
+	.atomic_duplicate_state	= drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_crtc_destroy_state,
+};
+
+static int ade_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+			 struct drm_plane *plane)
+{
+	struct kirin_drm_private *priv = dev->dev_private;
+	struct device_node *port;
+	int ret;
+
+	/* set crtc port so that
+	 * drm_of_find_possible_crtcs call works
+	 */
+	port = of_get_child_by_name(dev->dev->of_node, "port");
+	if (!port) {
+		DRM_ERROR("no port node found in %s\n",
+			  dev->dev->of_node->full_name);
+		return -EINVAL;
+	}
+	of_node_put(port);
+	crtc->port = port;
+
+	ret = drm_crtc_init_with_planes(dev, crtc, plane, NULL,
+					&ade_crtc_funcs);
+	if (ret) {
+		DRM_ERROR("failed to init crtc.\n");
+		return ret;
+	}
+
+	drm_crtc_helper_add(crtc, &ade_crtc_helper_funcs);
+	priv->crtc[drm_crtc_index(crtc)] = crtc;
+
+	return 0;
+}
+
+static void ade_rdma_set(void __iomem *base, struct drm_framebuffer *fb,
+			 u32 ch, u32 y, u32 in_h, u32 fmt)
+{
+	struct drm_gem_cma_object *obj = drm_fb_cma_get_gem_obj(fb, 0);
+	u32 reg_ctrl, reg_addr, reg_size, reg_stride, reg_space, reg_en;
+	u32 stride = fb->pitches[0];
+	u32 addr = (u32)obj->paddr + y * stride;
+
+	DRM_DEBUG_DRIVER("rdma%d: (y=%d, height=%d), stride=%d, paddr=0x%x\n",
+			 ch + 1, y, in_h, stride, (u32)obj->paddr);
+	DRM_DEBUG_DRIVER("addr=0x%x, fb:%dx%d, pixel_format=%d(%s)\n",
+			 addr, fb->width, fb->height, fmt,
+			 drm_get_format_name(fb->pixel_format));
+
+	/* get reg offset */
+	reg_ctrl = RD_CH_CTRL(ch);
+	reg_addr = RD_CH_ADDR(ch);
+	reg_size = RD_CH_SIZE(ch);
+	reg_stride = RD_CH_STRIDE(ch);
+	reg_space = RD_CH_SPACE(ch);
+	reg_en = RD_CH_EN(ch);
+
+	/*
+	 * TODO: set rotation
+	 */
+	writel((fmt << 16) & 0x1f0000, base + reg_ctrl);
+	writel(addr, base + reg_addr);
+	writel((in_h << 16) | stride, base + reg_size);
+	writel(stride, base + reg_stride);
+	writel(in_h * stride, base + reg_space);
+	writel(ADE_ENABLE, base + reg_en);
+	ade_update_reload_bit(base, RDMA_OFST + ch, 0);
+}
+
+static void ade_rdma_disable(void __iomem *base, u32 ch)
+{
+	u32 reg_en;
+
+	/* get reg offset */
+	reg_en = RD_CH_EN(ch);
+	writel(0, base + reg_en);
+	ade_update_reload_bit(base, RDMA_OFST + ch, 1);
+}
+
+static void ade_clip_set(void __iomem *base, u32 ch, u32 fb_w, u32 x,
+			 u32 in_w, u32 in_h)
+{
+	u32 disable_val;
+	u32 clip_left;
+	u32 clip_right;
+
+	/*
+	 * clip width, no need to clip height
+	 */
+	if (fb_w == in_w) { /* bypass */
+		disable_val = 1;
+		clip_left = 0;
+		clip_right = 0;
+	} else {
+		disable_val = 0;
+		clip_left = x;
+		clip_right = fb_w - (x + in_w) - 1;
+	}
+
+	DRM_DEBUG_DRIVER("clip%d: clip_left=%d, clip_right=%d\n",
+			 ch + 1, clip_left, clip_right);
+
+	writel(disable_val, base + ADE_CLIP_DISABLE(ch));
+	writel((fb_w - 1) << 16 | (in_h - 1), base + ADE_CLIP_SIZE0(ch));
+	writel(clip_left << 16 | clip_right, base + ADE_CLIP_SIZE1(ch));
+	ade_update_reload_bit(base, CLIP_OFST + ch, 0);
+}
+
+static void ade_clip_disable(void __iomem *base, u32 ch)
+{
+	writel(1, base + ADE_CLIP_DISABLE(ch));
+	ade_update_reload_bit(base, CLIP_OFST + ch, 1);
+}
+
+static bool has_Alpha_channel(int format)
+{
+	switch (format) {
+	case ADE_ARGB_8888:
+	case ADE_ABGR_8888:
+	case ADE_RGBA_8888:
+	case ADE_BGRA_8888:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void ade_get_blending_params(u32 fmt, u8 glb_alpha, u8 *alp_mode,
+				    u8 *alp_sel, u8 *under_alp_sel)
+{
+	bool has_alpha = has_Alpha_channel(fmt);
+
+	/*
+	 * get alp_mode
+	 */
+	if (has_alpha && glb_alpha < 255)
+		*alp_mode = ADE_ALP_PIXEL_AND_GLB;
+	else if (has_alpha)
+		*alp_mode = ADE_ALP_PIXEL;
+	else
+		*alp_mode = ADE_ALP_GLOBAL;
+
+	/*
+	 * get alp sel
+	 */
+	*alp_sel = ADE_ALP_MUL_COEFF_3; /* 1 */
+	*under_alp_sel = ADE_ALP_MUL_COEFF_2; /* 0 */
+}
+
+static void ade_compositor_routing_set(void __iomem *base, u8 ch,
+				       u32 x0, u32 y0,
+				       u32 in_w, u32 in_h, u32 fmt)
+{
+	u8 ovly_ch = 0; /* TODO: This is the zpos, only one plane now */
+	u8 glb_alpha = 255;
+	u32 x1 = x0 + in_w - 1;
+	u32 y1 = y0 + in_h - 1;
+	u32 val;
+	u8 alp_sel;
+	u8 under_alp_sel;
+	u8 alp_mode;
+
+	ade_get_blending_params(fmt, glb_alpha, &alp_mode, &alp_sel,
+				&under_alp_sel);
+
+	/* overlay routing setting
+	 */
+	writel(x0 << 16 | y0, base + ADE_OVLY_CH_XY0(ovly_ch));
+	writel(x1 << 16 | y1, base + ADE_OVLY_CH_XY1(ovly_ch));
+	val = (ch + 1) << CH_SEL_OFST | BIT(CH_EN_OFST) |
+		alp_sel << CH_ALP_SEL_OFST |
+		under_alp_sel << CH_UNDER_ALP_SEL_OFST |
+		glb_alpha << CH_ALP_GBL_OFST |
+		alp_mode << CH_ALP_MODE_OFST;
+	writel(val, base + ADE_OVLY_CH_CTL(ovly_ch));
+	/* connect this plane/channel to overlay2 compositor */
+	ade_update_bits(base + ADE_OVLY_CTL, CH_OVLY_SEL_OFST(ovly_ch),
+			CH_OVLY_SEL_MASK, CH_OVLY_SEL_VAL(OUT_OVLY));
+}
+
+static void ade_compositor_routing_disable(void __iomem *base, u32 ch)
+{
+	u8 ovly_ch = 0; /* TODO: Only primary plane now */
+
+	/* disable this plane/channel */
+	ade_update_bits(base + ADE_OVLY_CH_CTL(ovly_ch), CH_EN_OFST,
+			MASK(1), 0);
+	/* dis-connect this plane/channel of overlay2 compositor */
+	ade_update_bits(base + ADE_OVLY_CTL, CH_OVLY_SEL_OFST(ovly_ch),
+			CH_OVLY_SEL_MASK, 0);
+}
+
+/*
+ * Typicaly, a channel looks like: DMA-->clip-->scale-->ctrans-->compositor
+ */
+static void ade_update_channel(struct ade_plane *aplane,
+			       struct drm_framebuffer *fb, int crtc_x,
+			       int crtc_y, unsigned int crtc_w,
+			       unsigned int crtc_h, u32 src_x,
+			       u32 src_y, u32 src_w, u32 src_h)
+{
+	struct ade_hw_ctx *ctx = aplane->ctx;
+	void __iomem *base = ctx->base;
+	u32 fmt = ade_get_format(fb->pixel_format);
+	u32 ch = aplane->ch;
+	u32 in_w;
+	u32 in_h;
+
+	DRM_DEBUG_DRIVER("channel%d: src:(%d, %d)-%dx%d, crtc:(%d, %d)-%dx%d",
+			 ch + 1, src_x, src_y, src_w, src_h,
+			 crtc_x, crtc_y, crtc_w, crtc_h);
+
+	/* 1) DMA setting */
+	in_w = src_w;
+	in_h = src_h;
+	ade_rdma_set(base, fb, ch, src_y, in_h, fmt);
+
+	/* 2) clip setting */
+	ade_clip_set(base, ch, fb->width, src_x, in_w, in_h);
+
+	/* 3) TODO: scale setting for overlay planes */
+
+	/* 4) TODO: ctran/csc setting for overlay planes */
+
+	/* 5) compositor routing setting */
+	ade_compositor_routing_set(base, ch, crtc_x, crtc_y, in_w, in_h, fmt);
+}
+
+static void ade_disable_channel(struct ade_plane *aplane)
+{
+	struct ade_hw_ctx *ctx = aplane->ctx;
+	void __iomem *base = ctx->base;
+	u32 ch = aplane->ch;
+
+	DRM_DEBUG_DRIVER("disable channel%d\n", ch + 1);
+
+	/* disable read DMA */
+	ade_rdma_disable(base, ch);
+
+	/* disable clip */
+	ade_clip_disable(base, ch);
+
+	/* disable compositor routing */
+	ade_compositor_routing_disable(base, ch);
+}
+
+static int ade_plane_prepare_fb(struct drm_plane *plane,
+				const struct drm_plane_state *new_state)
+{
+	/* do nothing */
+	return 0;
+}
+
+static void ade_plane_cleanup_fb(struct drm_plane *plane,
+				 const struct drm_plane_state *old_state)
+{
+	/* do nothing */
+}
+
+static int ade_plane_atomic_check(struct drm_plane *plane,
+				  struct drm_plane_state *state)
+{
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_crtc_state *crtc_state;
+	u32 src_x = state->src_x >> 16;
+	u32 src_y = state->src_y >> 16;
+	u32 src_w = state->src_w >> 16;
+	u32 src_h = state->src_h >> 16;
+	int crtc_x = state->crtc_x;
+	int crtc_y = state->crtc_y;
+	u32 crtc_w = state->crtc_w;
+	u32 crtc_h = state->crtc_h;
+	u32 fmt;
+
+	if (!crtc || !fb)
+		return 0;
+
+	fmt = ade_get_format(fb->pixel_format);
+	if (fmt == ADE_FORMAT_UNSUPPORT)
+		return -EINVAL;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	if (src_w != crtc_w || src_h != crtc_h) {
+		DRM_ERROR("Scale not support!!!\n");
+		return -EINVAL;
+	}
+
+	if (src_x + src_w > fb->width ||
+	    src_y + src_h > fb->height)
+		return -EINVAL;
+
+	if (crtc_x < 0 || crtc_y < 0)
+		return -EINVAL;
+
+	if (crtc_x + crtc_w > crtc_state->adjusted_mode.hdisplay ||
+	    crtc_y + crtc_h > crtc_state->adjusted_mode.vdisplay)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ade_plane_atomic_update(struct drm_plane *plane,
+				    struct drm_plane_state *old_state)
+{
+	struct drm_plane_state	*state	= plane->state;
+	struct ade_plane *aplane = to_ade_plane(plane);
+
+	ade_update_channel(aplane, state->fb, state->crtc_x, state->crtc_y,
+			   state->crtc_w, state->crtc_h,
+			   state->src_x >> 16, state->src_y >> 16,
+			   state->src_w >> 16, state->src_h >> 16);
+}
+
+static void ade_plane_atomic_disable(struct drm_plane *plane,
+				     struct drm_plane_state *old_state)
+{
+	struct ade_plane *aplane = to_ade_plane(plane);
+
+	ade_disable_channel(aplane);
+}
+
+static const struct drm_plane_helper_funcs ade_plane_helper_funcs = {
+	.prepare_fb = ade_plane_prepare_fb,
+	.cleanup_fb = ade_plane_cleanup_fb,
+	.atomic_check = ade_plane_atomic_check,
+	.atomic_update = ade_plane_atomic_update,
+	.atomic_disable = ade_plane_atomic_disable,
+};
+
+static struct drm_plane_funcs ade_plane_funcs = {
+	.update_plane	= drm_atomic_helper_update_plane,
+	.disable_plane	= drm_atomic_helper_disable_plane,
+	.set_property = drm_atomic_helper_plane_set_property,
+	.destroy = drm_plane_cleanup,
+	.reset = drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+static int ade_plane_init(struct drm_device *dev, struct ade_plane *aplane,
+			  enum drm_plane_type type)
+{
+	const u32 *fmts;
+	u32 fmts_cnt;
+	int ret = 0;
+
+	/* get  properties */
+	fmts_cnt = ade_get_channel_formats(aplane->ch, &fmts);
+	if (ret)
+		return ret;
+
+	ret = drm_universal_plane_init(dev, &aplane->base, 1, &ade_plane_funcs,
+				       fmts, fmts_cnt, type);
+	if (ret) {
+		DRM_ERROR("fail to init plane, ch=%d\n", aplane->ch);
+		return ret;
+	}
+
+	drm_plane_helper_add(&aplane->base, &ade_plane_helper_funcs);
+
+	return 0;
+}
+
+static int ade_dts_parse(struct platform_device *pdev, struct ade_hw_ctx *ctx)
+{
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = pdev->dev.of_node;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ctx->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(ctx->base)) {
+		DRM_ERROR("failed to remap ade io base\n");
+		return  PTR_ERR(ctx->base);
+	}
+
+	ctx->reset = devm_reset_control_get(dev, NULL);
+	if (IS_ERR(ctx->reset))
+		return PTR_ERR(ctx->reset);
+
+	ctx->noc_regmap =
+		syscon_regmap_lookup_by_phandle(np, "hisilicon,noc-syscon");
+	if (IS_ERR(ctx->noc_regmap)) {
+		DRM_ERROR("failed to get noc regmap\n");
+		return PTR_ERR(ctx->noc_regmap);
+	}
+
+	ctx->irq = platform_get_irq(pdev, 0);
+	if (ctx->irq < 0) {
+		DRM_ERROR("failed to get irq\n");
+		return -ENODEV;
+	}
+
+	ctx->ade_core_clk = devm_clk_get(dev, "clk_ade_core");
+	if (!ctx->ade_core_clk) {
+		DRM_ERROR("failed to parse clk ADE_CORE\n");
+		return -ENODEV;
+	}
+
+	ctx->media_noc_clk = devm_clk_get(dev, "clk_codec_jpeg");
+	if (!ctx->media_noc_clk) {
+		DRM_ERROR("failed to parse clk CODEC_JPEG\n");
+	    return -ENODEV;
+	}
+
+	ctx->ade_pix_clk = devm_clk_get(dev, "clk_ade_pix");
+	if (!ctx->ade_pix_clk) {
+		DRM_ERROR("failed to parse clk ADE_PIX\n");
+	    return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int ade_drm_init(struct drm_device *dev)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct ade_data *ade;
+	struct ade_hw_ctx *ctx;
+	struct ade_crtc *acrtc;
+	struct ade_plane *aplane;
+	enum drm_plane_type type;
+	int ret;
+	int i;
+
+	ade = devm_kzalloc(dev->dev, sizeof(*ade), GFP_KERNEL);
+	if (!ade) {
+		DRM_ERROR("failed to alloc ade_data\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(pdev, ade);
+
+	ctx = &ade->ctx;
+	acrtc = &ade->acrtc;
+	acrtc->ctx = ctx;
+	acrtc->out_format = LDI_OUT_RGB_888;
+
+	ret = ade_dts_parse(pdev, ctx);
+	if (ret)
+		return ret;
+
+	/*
+	 * plane init
+	 * TODO: Now only support primary plane, overlay planes
+	 * need to do.
+	 */
+	for (i = 0; i < ADE_CH_NUM; i++) {
+		aplane = &ade->aplane[i];
+		aplane->ch = i;
+		aplane->ctx = ctx;
+		type = i == PRIMARY_CH ? DRM_PLANE_TYPE_PRIMARY :
+			DRM_PLANE_TYPE_OVERLAY;
+
+		ret = ade_plane_init(dev, aplane, type);
+		if (ret)
+			return ret;
+	}
+
+	/* crtc init */
+	ret = ade_crtc_init(dev, &acrtc->base, &ade->aplane[PRIMARY_CH].base);
+	if (ret)
+		return ret;
+
+	/* vblank irq init */
+	ret = devm_request_irq(dev->dev, ctx->irq, ade_irq_handler,
+			       IRQF_SHARED, dev->driver->name, acrtc);
+	if (ret)
+		return ret;
+	dev->driver->get_vblank_counter = drm_vblank_no_hw_counter;
+	dev->driver->enable_vblank = ade_enable_vblank;
+	dev->driver->disable_vblank = ade_disable_vblank;
+
+	return 0;
+}
+
+static void ade_drm_cleanup(struct drm_device *dev)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct ade_data *ade = platform_get_drvdata(pdev);
+	struct drm_crtc *crtc = &ade->acrtc.base;
+
+	drm_crtc_cleanup(crtc);
+}
+
+const struct kirin_dc_ops ade_dc_ops = {
+	.init = ade_drm_init,
+	.cleanup = ade_drm_cleanup
+};
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
new file mode 100644
index 000000000000..d8cc12a3c5d5
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
@@ -0,0 +1,369 @@
+/*
+ * Hisilicon Kirin SoCs drm master driver
+ *
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * Author:
+ *	Xinliang Liu <z.liuxinliang@hisilicon.com>
+ *	Xinliang Liu <xinliang.liu@linaro.org>
+ *	Xinwei Kong <kong.kongxinwei@hisilicon.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/of_platform.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "kirin_drm_drv.h"
+
+static struct kirin_dc_ops *dc_ops;
+
+static int kirin_drm_kms_cleanup(struct drm_device *dev)
+{
+	struct kirin_drm_private *priv = dev->dev_private;
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	if (priv->fbdev) {
+		drm_fbdev_cma_fini(priv->fbdev);
+		priv->fbdev = NULL;
+	}
+#endif
+	drm_kms_helper_poll_fini(dev);
+	drm_vblank_cleanup(dev);
+	dc_ops->cleanup(dev);
+	drm_mode_config_cleanup(dev);
+	devm_kfree(dev->dev, priv);
+	dev->dev_private = NULL;
+
+	return 0;
+}
+
+static void kirin_fbdev_output_poll_changed(struct drm_device *dev)
+{
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	struct kirin_drm_private *priv = dev->dev_private;
+#endif
+
+	dsi_set_output_client(dev);
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	if (priv->fbdev) {
+		drm_fbdev_cma_hotplug_event(priv->fbdev);
+	} else {
+		priv->fbdev = drm_fbdev_cma_init(dev, 32,
+				dev->mode_config.num_crtc,
+				dev->mode_config.num_connector);
+		if (IS_ERR(priv->fbdev))
+			priv->fbdev = NULL;
+	}
+#endif
+}
+
+static const struct drm_mode_config_funcs kirin_drm_mode_config_funcs = {
+	.fb_create = drm_fb_cma_create,
+	.output_poll_changed = kirin_fbdev_output_poll_changed,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static void kirin_drm_mode_config_init(struct drm_device *dev)
+{
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+
+	dev->mode_config.max_width = 2048;
+	dev->mode_config.max_height = 2048;
+
+	dev->mode_config.funcs = &kirin_drm_mode_config_funcs;
+}
+
+static int kirin_drm_kms_init(struct drm_device *dev)
+{
+	struct kirin_drm_private *priv;
+	int ret;
+
+	priv = devm_kzalloc(dev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev->dev_private = priv;
+	dev_set_drvdata(dev->dev, dev);
+
+	/* dev->mode_config initialization */
+	drm_mode_config_init(dev);
+	kirin_drm_mode_config_init(dev);
+
+	/* display controller init */
+	ret = dc_ops->init(dev);
+	if (ret)
+		goto err_mode_config_cleanup;
+
+	/* bind and init sub drivers */
+	ret = component_bind_all(dev->dev, dev);
+	if (ret) {
+		DRM_ERROR("failed to bind all component.\n");
+		goto err_dc_cleanup;
+	}
+
+	/* vblank init */
+	ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
+	if (ret) {
+		DRM_ERROR("failed to initialize vblank.\n");
+		goto err_unbind_all;
+	}
+	/* with irq_enabled = true, we can use the vblank feature. */
+	dev->irq_enabled = true;
+
+	/* reset all the states of crtc/plane/encoder/connector */
+	drm_mode_config_reset(dev);
+
+	/* init kms poll for handling hpd */
+	drm_kms_helper_poll_init(dev);
+
+	/* force detection after connectors init */
+	(void)drm_helper_hpd_irq_event(dev);
+
+	return 0;
+
+err_unbind_all:
+	component_unbind_all(dev->dev, dev);
+err_dc_cleanup:
+	dc_ops->cleanup(dev);
+err_mode_config_cleanup:
+	drm_mode_config_cleanup(dev);
+	devm_kfree(dev->dev, priv);
+	dev->dev_private = NULL;
+
+	return ret;
+}
+
+static const struct file_operations kirin_drm_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drm_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= drm_compat_ioctl,
+#endif
+	.poll		= drm_poll,
+	.read		= drm_read,
+	.llseek		= no_llseek,
+	.mmap		= drm_gem_cma_mmap,
+};
+
+static int kirin_gem_cma_dumb_create(struct drm_file *file,
+				     struct drm_device *dev,
+				     struct drm_mode_create_dumb *args)
+{
+	return drm_gem_cma_dumb_create_internal(file, dev, args);
+}
+
+static struct drm_driver kirin_drm_driver = {
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
+				  DRIVER_ATOMIC | DRIVER_HAVE_IRQ,
+	.fops			= &kirin_drm_fops,
+	.set_busid		= drm_platform_set_busid,
+
+	.gem_free_object	= drm_gem_cma_free_object,
+	.gem_vm_ops		= &drm_gem_cma_vm_ops,
+	.dumb_create		= kirin_gem_cma_dumb_create,
+	.dumb_map_offset	= drm_gem_cma_dumb_map_offset,
+	.dumb_destroy		= drm_gem_dumb_destroy,
+
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_export	= drm_gem_prime_export,
+	.gem_prime_import	= drm_gem_prime_import,
+	.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap		= drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap	= drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
+
+	.name			= "kirin",
+	.desc			= "Hisilicon Kirin SoCs' DRM Driver",
+	.date			= "20150718",
+	.major			= 1,
+	.minor			= 0,
+};
+
+static int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int kirin_drm_connectors_register(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	struct drm_connector *failed_connector;
+	int ret;
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_for_each_connector(connector, dev) {
+		ret = drm_connector_register(connector);
+		if (ret) {
+			failed_connector = connector;
+			goto err;
+		}
+	}
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+
+err:
+	drm_for_each_connector(connector, dev) {
+		if (failed_connector == connector)
+			break;
+		drm_connector_unregister(connector);
+	}
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return ret;
+}
+
+static int kirin_drm_bind(struct device *dev)
+{
+	struct drm_driver *driver = &kirin_drm_driver;
+	struct drm_device *drm_dev;
+	int ret;
+
+	drm_dev = drm_dev_alloc(driver, dev);
+	if (!drm_dev)
+		return -ENOMEM;
+
+	drm_dev->platformdev = to_platform_device(dev);
+
+	ret = kirin_drm_kms_init(drm_dev);
+	if (ret)
+		goto err_drm_dev_unref;
+
+	ret = drm_dev_register(drm_dev, 0);
+	if (ret)
+		goto err_kms_cleanup;
+
+	/* connectors should be registered after drm device register */
+	ret = kirin_drm_connectors_register(drm_dev);
+	if (ret)
+		goto err_drm_dev_unregister;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n",
+		 driver->name, driver->major, driver->minor, driver->patchlevel,
+		 driver->date, drm_dev->primary->index);
+
+	return 0;
+
+err_drm_dev_unregister:
+	drm_dev_unregister(drm_dev);
+err_kms_cleanup:
+	kirin_drm_kms_cleanup(drm_dev);
+err_drm_dev_unref:
+	drm_dev_unref(drm_dev);
+
+	return ret;
+}
+
+static void kirin_drm_unbind(struct device *dev)
+{
+	drm_put_dev(dev_get_drvdata(dev));
+}
+
+static const struct component_master_ops kirin_drm_ops = {
+	.bind = kirin_drm_bind,
+	.unbind = kirin_drm_unbind,
+};
+
+static struct device_node *kirin_get_remote_node(struct device_node *np)
+{
+	struct device_node *endpoint, *remote;
+
+	/* get the first endpoint, in our case only one remote node
+	 * is connected to display controller.
+	 */
+	endpoint = of_graph_get_next_endpoint(np, NULL);
+	if (!endpoint) {
+		DRM_ERROR("no valid endpoint node\n");
+		return ERR_PTR(-ENODEV);
+	}
+	of_node_put(endpoint);
+
+	remote = of_graph_get_remote_port_parent(endpoint);
+	if (!remote) {
+		DRM_ERROR("no valid remote node\n");
+		return ERR_PTR(-ENODEV);
+	}
+	of_node_put(remote);
+
+	if (!of_device_is_available(remote)) {
+		DRM_ERROR("not available for remote node\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	return remote;
+}
+
+static int kirin_drm_platform_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct component_match *match = NULL;
+	struct device_node *remote;
+
+	dc_ops = (struct kirin_dc_ops *)of_device_get_match_data(dev);
+	if (!dc_ops) {
+		DRM_ERROR("failed to get dt id data\n");
+		return -EINVAL;
+	}
+
+	remote = kirin_get_remote_node(np);
+	if (IS_ERR(remote))
+		return PTR_ERR(remote);
+
+	component_match_add(dev, &match, compare_of, remote);
+
+	return component_master_add_with_match(dev, &kirin_drm_ops, match);
+
+	return 0;
+}
+
+static int kirin_drm_platform_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &kirin_drm_ops);
+	dc_ops = NULL;
+	return 0;
+}
+
+static const struct of_device_id kirin_drm_dt_ids[] = {
+	{ .compatible = "hisilicon,hi6220-ade",
+	  .data = &ade_dc_ops,
+	},
+	{ /* end node */ },
+};
+MODULE_DEVICE_TABLE(of, kirin_drm_dt_ids);
+
+static struct platform_driver kirin_drm_platform_driver = {
+	.probe = kirin_drm_platform_probe,
+	.remove = kirin_drm_platform_remove,
+	.driver = {
+		.name = "kirin-drm",
+		.of_match_table = kirin_drm_dt_ids,
+	},
+};
+
+module_platform_driver(kirin_drm_platform_driver);
+
+MODULE_AUTHOR("Xinliang Liu <xinliang.liu@linaro.org>");
+MODULE_AUTHOR("Xinliang Liu <z.liuxinliang@hisilicon.com>");
+MODULE_AUTHOR("Xinwei Kong <kong.kongxinwei@hisilicon.com>");
+MODULE_DESCRIPTION("hisilicon Kirin SoCs' DRM master driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h
new file mode 100644
index 000000000000..d9cfd3fe081e
--- /dev/null
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2014-2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __KIRIN_DRM_DRV_H__
+#define __KIRIN_DRM_DRV_H__
+
+#define MAX_CRTC	2
+
+/* display controller init/cleanup ops */
+struct kirin_dc_ops {
+	int (*init)(struct drm_device *dev);
+	void (*cleanup)(struct drm_device *dev);
+};
+
+struct kirin_drm_private {
+	struct drm_crtc *crtc[MAX_CRTC];
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	struct drm_fbdev_cma *fbdev;
+#endif
+};
+
+extern const struct kirin_dc_ops ade_dc_ops;
+extern void dsi_set_output_client(struct drm_device *dev);
+
+#endif /* __KIRIN_DRM_DRV_H__ */
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index 22c7ed63a001..375c02834869 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -31,4 +31,12 @@ config DRM_I2C_NXP_TDA998X
 	help
 	  Support for NXP Semiconductors TDA998X HDMI encoders.
 
+config DRM_ICN_6201
+        tristate "ICN6201 --- MIPI DSI BRIDGE TO FLATLINKTM LVDS"
+        depends on OF
+        depends on DRM_MIPI_DSI
+       # depends on BACKLIGHT_CLASS_DEVICE
+        help
+          Say Y here if you want to enable support for ICN6201
+
 endmenu
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 2c72eb584ab7..2d0abf4e27d9 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -1,6 +1,6 @@
 ccflags-y := -Iinclude/drm
 
-obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o
+obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o adv7511_audio.o
 
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
@@ -10,3 +10,5 @@ obj-$(CONFIG_DRM_I2C_SIL164) += sil164.o
 
 tda998x-y := tda998x_drv.o
 obj-$(CONFIG_DRM_I2C_NXP_TDA998X) += tda998x.o
+
+obj-$(CONFIG_DRM_ICN_6201) += icn_6201.o
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c
index 00416f23b5cb..2addd97fae8a 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/i2c/adv7511.c
@@ -12,41 +12,19 @@
 #include <linux/module.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/of_graph.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder_slave.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_mipi_dsi.h>
 
 #include "adv7511.h"
 
-struct adv7511 {
-	struct i2c_client *i2c_main;
-	struct i2c_client *i2c_edid;
-
-	struct regmap *regmap;
-	struct regmap *packet_memory_regmap;
-	enum drm_connector_status status;
-	bool powered;
-
-	unsigned int f_tmds;
-
-	unsigned int current_edid_segment;
-	uint8_t edid_buf[256];
-	bool edid_read;
-
-	wait_queue_head_t wq;
-	struct drm_encoder *encoder;
-
-	bool embedded_sync;
-	enum adv7511_sync_polarity vsync_polarity;
-	enum adv7511_sync_polarity hsync_polarity;
-	bool rgb;
-
-	struct edid *edid;
-
-	struct gpio_desc *gpio_pd;
-};
+#define HPD_ENABLE	1
 
 static struct adv7511 *encoder_to_adv7511(struct drm_encoder *encoder)
 {
@@ -66,6 +44,24 @@ static const struct reg_sequence adv7511_fixed_registers[] = {
 	{ 0x55, 0x02 },
 };
 
+/* ADI recommended values for proper operation. */
+static const struct reg_sequence adv7533_fixed_registers[] = {
+	{ 0x16, 0x20 },
+	{ 0x9a, 0xe0 },
+	{ 0xba, 0x70 },
+	{ 0xde, 0x82 },
+	{ 0xe4, 0x40 },
+	{ 0xe5, 0x80 },
+};
+
+static const struct reg_sequence adv7533_cec_fixed_registers[] = {
+	{ 0x15, 0xd0 },
+	{ 0x17, 0xd0 },
+	{ 0x24, 0x20 },
+	{ 0x57, 0x11 },
+	{ 0x05, 0xc8 },
+};
+
 /* -----------------------------------------------------------------------------
  * Register access
  */
@@ -158,6 +154,23 @@ static const struct regmap_config adv7511_regmap_config = {
 	.volatile_reg = adv7511_register_volatile,
 };
 
+static const struct regmap_config adv7533_cec_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xff,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static const struct regmap_config adv7533_packet_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xff,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+
 /* -----------------------------------------------------------------------------
  * Hardware configuration
  */
@@ -193,7 +206,7 @@ static void adv7511_set_colormap(struct adv7511 *adv7511, bool enable,
 			   ADV7511_CSC_UPDATE_MODE, 0);
 }
 
-static int adv7511_packet_enable(struct adv7511 *adv7511, unsigned int packet)
+int adv7511_packet_enable(struct adv7511 *adv7511, unsigned int packet)
 {
 	if (packet & 0xff)
 		regmap_update_bits(adv7511->regmap, ADV7511_REG_PACKET_ENABLE0,
@@ -208,7 +221,7 @@ static int adv7511_packet_enable(struct adv7511 *adv7511, unsigned int packet)
 	return 0;
 }
 
-static int adv7511_packet_disable(struct adv7511 *adv7511, unsigned int packet)
+int adv7511_packet_disable(struct adv7511 *adv7511, unsigned int packet)
 {
 	if (packet & 0xff)
 		regmap_update_bits(adv7511->regmap, ADV7511_REG_PACKET_ENABLE0,
@@ -358,6 +371,144 @@ static void adv7511_set_link_config(struct adv7511 *adv7511,
 	adv7511->rgb = config->input_colorspace == HDMI_COLORSPACE_RGB;
 }
 
+static void adv7511_dsi_config_tgen(struct adv7511 *adv7511)
+{
+	struct mipi_dsi_device *dsi = adv7511->dsi;
+	struct drm_display_mode *mode = &adv7511->curr_mode;
+	u8 clock_div_by_lanes[] = { 6, 4, 3 }; /* 2, 3, 4 lanes */
+	unsigned int hsw, hfp, hbp, vsw, vfp, vbp;
+
+	hsw = mode->hsync_end - mode->hsync_start;
+	hfp = mode->hsync_start - mode->hdisplay;
+	hbp = mode->htotal - mode->hsync_end;
+	vsw = mode->vsync_end - mode->vsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	/* set pixel clock divider mode */
+	regmap_write(adv7511->regmap_cec, 0x16,
+			clock_div_by_lanes[dsi->lanes - 2] << 3);
+
+	/* horizontal porch params */
+	regmap_write(adv7511->regmap_cec, 0x28, mode->htotal >> 4);
+	regmap_write(adv7511->regmap_cec, 0x29, (mode->htotal << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x2a, hsw >> 4);
+	regmap_write(adv7511->regmap_cec, 0x2b, (hsw << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x2c, hfp >> 4);
+	regmap_write(adv7511->regmap_cec, 0x2d, (hfp << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x2e, hbp >> 4);
+	regmap_write(adv7511->regmap_cec, 0x2f, (hbp << 4) & 0xff);
+
+	/* vertical porch params */
+	regmap_write(adv7511->regmap_cec, 0x30, mode->vtotal >> 4);
+	regmap_write(adv7511->regmap_cec, 0x31, (mode->vtotal << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x32, vsw >> 4);
+	regmap_write(adv7511->regmap_cec, 0x33, (vsw << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x34, vfp >> 4);
+	regmap_write(adv7511->regmap_cec, 0x35, (vfp << 4) & 0xff);
+	regmap_write(adv7511->regmap_cec, 0x36, vbp >> 4);
+	regmap_write(adv7511->regmap_cec, 0x37, (vbp << 4) & 0xff);
+}
+
+static void adv7511_dsi_receiver_dpms(struct adv7511 *adv7511)
+{
+	if (adv7511->type != ADV7533)
+		return;
+
+	if (adv7511->powered) {
+		struct mipi_dsi_device *dsi = adv7511->dsi;
+
+		adv7511_dsi_config_tgen(adv7511);
+
+		/* set number of dsi lanes */
+		regmap_write(adv7511->regmap_cec, 0x1c, dsi->lanes << 4);
+
+#if 0
+		/* reset internal timing generator */
+		regmap_write(adv7511->regmap_cec, 0x27, 0xcb);
+		regmap_write(adv7511->regmap_cec, 0x27, 0x8b);
+		regmap_write(adv7511->regmap_cec, 0x27, 0xcb);
+#else
+		/* disable internal timing generator */
+		regmap_write(adv7511->regmap_cec, 0x27, 0x0b);
+#endif
+
+
+		/* enable hdmi */
+		regmap_write(adv7511->regmap_cec, 0x03, 0x89);
+		/* disable test mode */
+		regmap_write(adv7511->regmap_cec, 0x55, 0x00);
+		/* SPD */
+		{
+			static const unsigned char spd_if[] = {
+				0x83, 0x01, 25, 0x00,
+				'L', 'i', 'n', 'a', 'r', 'o', 0, 0,
+				'9', '6', 'b', 'o', 'a', 'r', 'd', 's',
+				':', 'H', 'i', 'k', 'e', 'y', 0, 0,
+			};
+			int n;
+
+			for (n = 0; n < sizeof(spd_if); n++)
+				regmap_write(adv7511->regmap_packet, n, spd_if[n]);
+
+			/* enable send SPD */
+			regmap_update_bits(adv7511->regmap, 0x40, BIT(6), BIT(6));
+		}
+
+		/* force audio */
+		/* hide Audio infoframe updates */
+		regmap_update_bits(adv7511->regmap, 0x4a, BIT(5), BIT(5));
+
+		/* i2s, internal mclk, mclk-256 */
+		regmap_update_bits(adv7511->regmap, 0x0a, 0x1f, 1);
+		regmap_update_bits(adv7511->regmap, 0x0b, 0xe0, 0);
+		/* enable i2s, use i2s format, sample rate from i2s */
+		regmap_update_bits(adv7511->regmap, 0x0c, 0xc7, BIT(2));
+		/* 16 bit audio */
+		regmap_update_bits(adv7511->regmap, 0x0d, 0xff, 16);
+		/* 16-bit audio */
+		regmap_update_bits(adv7511->regmap, 0x14, 0x0f, 2 << 4);
+		/* 48kHz */
+		regmap_update_bits(adv7511->regmap, 0x15, 0xf0, 2 << 4);
+		/* enable N/CTS, enable Audio sample packets */
+		regmap_update_bits(adv7511->regmap, 0x44, BIT(5), BIT(5));
+		/* N = 6144 */
+		regmap_write(adv7511->regmap, 1, (6144 >> 16) & 0xf);
+		regmap_write(adv7511->regmap, 2, (6144 >> 8) & 0xff);
+		regmap_write(adv7511->regmap, 3, (6144) & 0xff);
+		/* automatic cts */
+		regmap_update_bits(adv7511->regmap, 0x0a, BIT(7), 0);
+		/* enable N/CTS */
+		regmap_update_bits(adv7511->regmap, 0x44, BIT(6), BIT(6));
+		/* not copyrighted */
+		regmap_update_bits(adv7511->regmap, 0x12, BIT(5), BIT(5));
+
+		/* left source */
+		regmap_update_bits(adv7511->regmap, 0x0e, 7 << 3, 0);
+		/* right source */
+		regmap_update_bits(adv7511->regmap, 0x0e, 7 << 0, 1);
+		/* number of channels: sect 4.5.4: set to 0 */
+		regmap_update_bits(adv7511->regmap, 0x73, 7, 1);
+		/* number of channels: sect 4.5.4: set to 0 */
+		regmap_update_bits(adv7511->regmap, 0x73, 0xf0, 1 << 4);
+		/* sample rate: 48kHz */
+		regmap_update_bits(adv7511->regmap, 0x74, 7 << 2, 3 << 2);
+		/* channel allocation reg: sect 4.5.4: set to 0 */
+		regmap_update_bits(adv7511->regmap, 0x76, 0xff, 0);
+		/* enable audio infoframes */
+		regmap_update_bits(adv7511->regmap, 0x44, BIT(3), BIT(3));
+
+		/* AV mute disable */
+		regmap_update_bits(adv7511->regmap, 0x4b, BIT(7) | BIT(6), BIT(7));
+
+		/* use Audio infoframe updated info */
+		regmap_update_bits(adv7511->regmap, 0x4a, BIT(5), 0);
+	} else {
+		regmap_write(adv7511->regmap_cec, 0x03, 0x0b);
+		regmap_write(adv7511->regmap_cec, 0x27, 0x0b);
+	}
+}
+
 static void adv7511_power_on(struct adv7511 *adv7511)
 {
 	adv7511->current_edid_segment = -1;
@@ -386,7 +537,13 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 	 */
 	regcache_sync(adv7511->regmap);
 
+	if (adv7511->type == ADV7533)
+		regmap_register_patch(adv7511->regmap_cec,
+				      adv7533_cec_fixed_registers,
+				      ARRAY_SIZE(adv7533_cec_fixed_registers));
 	adv7511->powered = true;
+
+	adv7511_dsi_receiver_dpms(adv7511);
 }
 
 static void adv7511_power_off(struct adv7511 *adv7511)
@@ -398,12 +555,15 @@ static void adv7511_power_off(struct adv7511 *adv7511)
 	regcache_mark_dirty(adv7511->regmap);
 
 	adv7511->powered = false;
+
+	adv7511_dsi_receiver_dpms(adv7511);
 }
 
 /* -----------------------------------------------------------------------------
  * Interrupt and hotplug detection
  */
 
+#if HPD_ENABLE
 static bool adv7511_hpd(struct adv7511 *adv7511)
 {
 	unsigned int irq0;
@@ -421,8 +581,9 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
 
 	return false;
 }
+#endif
 
-static int adv7511_irq_process(struct adv7511 *adv7511)
+static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd)
 {
 	unsigned int irq0, irq1;
 	int ret;
@@ -438,7 +599,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-	if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
+	if (process_hpd && irq0 & ADV7511_INT0_HDP && adv7511->encoder)
 		drm_helper_hpd_irq_event(adv7511->encoder->dev);
 
 	if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
@@ -456,7 +617,7 @@ static irqreturn_t adv7511_irq_handler(int irq, void *devid)
 	struct adv7511 *adv7511 = devid;
 	int ret;
 
-	ret = adv7511_irq_process(adv7511);
+	ret = adv7511_irq_process(adv7511, true);
 	return ret < 0 ? IRQ_NONE : IRQ_HANDLED;
 }
 
@@ -473,7 +634,7 @@ static int adv7511_wait_for_edid(struct adv7511 *adv7511, int timeout)
 				adv7511->edid_read, msecs_to_jiffies(timeout));
 	} else {
 		for (; timeout > 0; timeout -= 25) {
-			ret = adv7511_irq_process(adv7511);
+			ret = adv7511_irq_process(adv7511, false);
 			if (ret < 0)
 				break;
 
@@ -555,18 +716,19 @@ static int adv7511_get_edid_block(void *data, u8 *buf, unsigned int block,
 }
 
 /* -----------------------------------------------------------------------------
- * Encoder operations
+ * ADV75xx helpers
  */
-
-static int adv7511_get_modes(struct drm_encoder *encoder,
-			     struct drm_connector *connector)
+static int adv7511_get_modes(struct adv7511 *adv7511,
+		struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	struct edid *edid;
 	unsigned int count;
 
 	/* Reading the EDID only works if the device is powered */
 	if (!adv7511->powered) {
+		regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
+				   ADV7511_REG_POWER2_HDP_SRC_MASK,
+				   ADV7511_REG_POWER2_HDP_SRC_NONE);
 		regmap_write(adv7511->regmap, ADV7511_REG_INT(0),
 			     ADV7511_INT0_EDID_READY);
 		regmap_write(adv7511->regmap, ADV7511_REG_INT(1),
@@ -574,6 +736,8 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 		regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
 				   ADV7511_POWER_POWER_DOWN, 0);
 		adv7511->current_edid_segment = -1;
+		/* wait some time for edid is ready */
+		msleep(200);
 	}
 
 	edid = drm_do_get_edid(connector, adv7511_get_edid_block, adv7511);
@@ -596,24 +760,15 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 	return count;
 }
 
-static void adv7511_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
-
-	if (mode == DRM_MODE_DPMS_ON)
-		adv7511_power_on(adv7511);
-	else
-		adv7511_power_off(adv7511);
-}
-
 static enum drm_connector_status
-adv7511_encoder_detect(struct drm_encoder *encoder,
+adv7511_detect(struct adv7511 *adv7511,
 		       struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	enum drm_connector_status status;
 	unsigned int val;
+#if HPD_ENABLE
 	bool hpd;
+#endif
 	int ret;
 
 	ret = regmap_read(adv7511->regmap, ADV7511_REG_STATUS, &val);
@@ -625,6 +780,7 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	else
 		status = connector_status_disconnected;
 
+#if HPD_ENABLE
 	hpd = adv7511_hpd(adv7511);
 
 	/* The chip resets itself when the cable is disconnected, so in case
@@ -634,7 +790,7 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	if (status == connector_status_connected && hpd && adv7511->powered) {
 		regcache_mark_dirty(adv7511->regmap);
 		adv7511_power_on(adv7511);
-		adv7511_get_modes(encoder, connector);
+		adv7511_get_modes(adv7511, connector);
 		if (adv7511->status == connector_status_connected)
 			status = connector_status_disconnected;
 	} else {
@@ -643,25 +799,41 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 				   ADV7511_REG_POWER2_HDP_SRC_MASK,
 				   ADV7511_REG_POWER2_HDP_SRC_BOTH);
 	}
+#endif
 
 	adv7511->status = status;
 	return status;
 }
 
-static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
-				      struct drm_display_mode *mode)
+static int adv7511_mode_valid(struct adv7511 *adv7511,
+				     struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
-
-	return MODE_OK;
+	/*
+	 * some work well modes which want to put in the front of the mode list.
+	 */
+	DRM_DEBUG("Checking mode %ix%i@%i clock: %i...",
+		  mode->hdisplay, mode->vdisplay, drm_mode_vrefresh(mode), mode->clock);
+	if ((mode->hdisplay == 1920 && mode->vdisplay == 1080 && mode->clock == 148500) ||
+	    (mode->hdisplay == 1280 && mode->vdisplay == 800 && mode->clock == 83496) ||
+	    (mode->hdisplay == 1280 && mode->vdisplay == 720 && mode->clock == 74440) ||
+	    (mode->hdisplay == 1280 && mode->vdisplay == 720 && mode->clock == 74250) ||
+	    (mode->hdisplay == 1024 && mode->vdisplay == 768 && mode->clock == 75000) ||
+	    (mode->hdisplay == 1024 && mode->vdisplay == 768 && mode->clock == 81833) ||
+	    (mode->hdisplay == 800 && mode->vdisplay == 600 && mode->clock == 40000)) {
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
+		DRM_DEBUG("OK\n");
+		return MODE_OK;
+	}
+	DRM_DEBUG("BAD\n");
+	return MODE_BAD;
 }
 
-static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
+static void adv7511_mode_set(struct adv7511 *adv7511,
 				     struct drm_display_mode *mode,
 				     struct drm_display_mode *adj_mode)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	unsigned int low_refresh_rate;
 	unsigned int hsync_polarity = 0;
 	unsigned int vsync_polarity = 0;
@@ -744,6 +916,28 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	regmap_update_bits(adv7511->regmap, 0x17,
 		0x60, (vsync_polarity << 6) | (hsync_polarity << 5));
 
+	if (adv7511->type == ADV7533 && adv7511->num_dsi_lanes == 4) {
+		struct mipi_dsi_device *dsi = adv7511->dsi;
+		int lanes, ret;
+
+		if (adj_mode->clock > 80000)
+			lanes = 4;
+		else
+			lanes = 3;
+
+		if (lanes != dsi->lanes) {
+			mipi_dsi_detach(dsi);
+			dsi->lanes = lanes;
+			ret = mipi_dsi_attach(dsi);
+			if (ret) {
+				DRM_ERROR("Failed to change host lanes\n");
+				return;
+			}
+		}
+	}
+
+	drm_mode_copy(&adv7511->curr_mode, adj_mode);
+
 	/*
 	 * TODO Test first order 4:2:2 to 4:4:4 up conversion method, which is
 	 * supposed to give better results.
@@ -752,12 +946,246 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	adv7511->f_tmds = mode->clock;
 }
 
+/* -----------------------------------------------------------------------------
+ * Encoder operations
+ */
+
+static int adv7511_encoder_get_modes(struct drm_encoder *encoder,
+			     struct drm_connector *connector)
+{
+	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
+
+	return adv7511_get_modes(adv7511, connector);
+}
+
+static void adv7511_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
+
+	if (mode == DRM_MODE_DPMS_ON)
+		adv7511_power_on(adv7511);
+	else
+		adv7511_power_off(adv7511);
+}
+
+static enum drm_connector_status
+adv7511_encoder_detect(struct drm_encoder *encoder,
+		       struct drm_connector *connector)
+{
+	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
+
+	return adv7511_detect(adv7511, connector);
+}
+
+static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
+				      struct drm_display_mode *mode)
+{
+	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
+
+	return adv7511_mode_valid(adv7511, mode);
+}
+
+static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
+				     struct drm_display_mode *mode,
+				     struct drm_display_mode *adj_mode)
+{
+	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
+
+	adv7511_mode_set(adv7511, mode, adj_mode);
+}
+
 static struct drm_encoder_slave_funcs adv7511_encoder_funcs = {
 	.dpms = adv7511_encoder_dpms,
 	.mode_valid = adv7511_encoder_mode_valid,
 	.mode_set = adv7511_encoder_mode_set,
 	.detect = adv7511_encoder_detect,
-	.get_modes = adv7511_get_modes,
+	.get_modes = adv7511_encoder_get_modes,
+};
+
+/* -----------------------------------------------------------------------------
+ * Bridge and connector functions
+ */
+
+static struct adv7511 *connector_to_adv7511(struct drm_connector *connector)
+{
+	return container_of(connector, struct adv7511, connector);
+}
+
+/* Connector helper functions */
+static int adv7533_connector_get_modes(struct drm_connector *connector)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_get_modes(adv, connector);
+}
+
+static struct drm_encoder *
+adv7533_connector_best_encoder(struct drm_connector *connector)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv->bridge.encoder;
+}
+
+static enum drm_mode_status
+adv7533_connector_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_mode_valid(adv, mode);
+}
+
+static struct drm_connector_helper_funcs adv7533_connector_helper_funcs = {
+	.get_modes = adv7533_connector_get_modes,
+	.best_encoder = adv7533_connector_best_encoder,
+	.mode_valid = adv7533_connector_mode_valid,
+};
+
+static enum drm_connector_status
+adv7533_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_detect(adv, connector);
+}
+
+static struct drm_connector_funcs adv7533_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = adv7533_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+/* Bridge funcs */
+static struct adv7511 *bridge_to_adv7511(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct adv7511, bridge);
+}
+
+static void adv7533_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_power_on(adv);
+}
+
+static void adv7533_bridge_post_disable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+#if HPD_ENABLE
+	if (!adv->powered)
+		return;
+#endif
+
+	adv7511_power_off(adv);
+}
+
+static void adv7533_bridge_enable(struct drm_bridge *bridge)
+{
+}
+
+static void adv7533_bridge_disable(struct drm_bridge *bridge)
+{
+}
+
+static void adv7533_bridge_mode_set(struct drm_bridge *bridge,
+				     struct drm_display_mode *mode,
+				     struct drm_display_mode *adj_mode)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_mode_set(adv, mode, adj_mode);
+}
+
+static int adv7533_attach_dsi(struct adv7511 *adv7511)
+{
+	struct device *dev = &adv7511->i2c_main->dev;
+	struct mipi_dsi_device *dsi;
+	struct mipi_dsi_host *host;
+	int ret;
+
+	host = of_find_mipi_dsi_host_by_node(adv7511->host_node);
+	if (!host) {
+		dev_err(dev, "failed to find dsi host\n");
+		return -EPROBE_DEFER;
+	}
+
+	/* can adv7533 virtual channel be non-zero? */
+	dsi = mipi_dsi_new_dummy(host, 0);
+	if (IS_ERR(dsi)) {
+		dev_err(dev, "failed to create dummy dsi device\n");
+		ret = PTR_ERR(dsi);
+		goto err_dsi_device;
+	}
+
+	adv7511->dsi = dsi;
+
+	dsi->lanes = adv7511->num_dsi_lanes;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE
+			| MIPI_DSI_MODE_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "failed to attach dsi to host\n");
+		goto err_dsi_attach;
+	}
+
+	return 0;
+
+err_dsi_attach:
+	mipi_dsi_unregister_device(dsi);
+err_dsi_device:
+	return ret;
+}
+
+static int adv7533_bridge_attach(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+	int ret;
+
+	adv->encoder = bridge->encoder;
+
+	if (!bridge->encoder) {
+		DRM_ERROR("Parent encoder object not found");
+		return -ENODEV;
+	}
+
+#if HPD_ENABLE
+	adv->connector.polled = DRM_CONNECTOR_POLL_HPD;
+#endif
+
+	ret = drm_connector_init(bridge->dev, &adv->connector,
+			&adv7533_connector_funcs, DRM_MODE_CONNECTOR_HDMIA);
+	if (ret) {
+		DRM_ERROR("Failed to initialize connector with drm\n");
+		return ret;
+	}
+	drm_connector_helper_add(&adv->connector,
+					&adv7533_connector_helper_funcs);
+	drm_mode_connector_attach_encoder(&adv->connector, adv->encoder);
+
+#if HPD_ENABLE
+	drm_helper_hpd_irq_event(adv->connector.dev);
+#endif
+
+	adv7533_attach_dsi(adv);
+
+	return ret;
+}
+
+static struct drm_bridge_funcs adv7533_bridge_funcs = {
+	.pre_enable = adv7533_bridge_pre_enable,
+	.enable = adv7533_bridge_enable,
+	.disable = adv7533_bridge_disable,
+	.post_disable = adv7533_bridge_post_disable,
+	.mode_set = adv7533_bridge_mode_set,
+	.attach = adv7533_bridge_attach,
 };
 
 /* -----------------------------------------------------------------------------
@@ -770,8 +1198,6 @@ static int adv7511_parse_dt(struct device_node *np,
 	const char *str;
 	int ret;
 
-	memset(config, 0, sizeof(*config));
-
 	of_property_read_u32(np, "adi,input-depth", &config->input_color_depth);
 	if (config->input_color_depth != 8 && config->input_color_depth != 10 &&
 	    config->input_color_depth != 12)
@@ -849,10 +1275,54 @@ static int adv7511_parse_dt(struct device_node *np,
 	return 0;
 }
 
+static int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv7511)
+{
+	u32 num_lanes;
+	struct device_node *endpoint;
+
+	of_property_read_u32(np, "adi,dsi-lanes", &num_lanes);
+
+	if (num_lanes < 1 || num_lanes > 4)
+		return -EINVAL;
+
+	adv7511->num_dsi_lanes = num_lanes;
+
+	endpoint = of_graph_get_next_endpoint(np, NULL);
+	if (!endpoint) {
+		DRM_ERROR("adv dsi input endpoint not found\n");
+		return -ENODEV;
+	}
+
+	adv7511->host_node = of_graph_get_remote_port_parent(endpoint);
+	if (!adv7511->host_node) {
+		DRM_ERROR("dsi host node not found\n");
+		of_node_put(endpoint);
+		return -ENODEV;
+	}
+
+	of_node_put(endpoint);
+	of_node_put(adv7511->host_node);
+
+	/* TODO: Check if these need to be parsed by DT or not */
+	adv7511->rgb = true;
+	adv7511->embedded_sync = false;
+
+	return 0;
+}
+
 static const int edid_i2c_addr = 0x7e;
 static const int packet_i2c_addr = 0x70;
 static const int cec_i2c_addr = 0x78;
 
+static const struct of_device_id adv7511_of_ids[] = {
+	{ .compatible = "adi,adv7511", .data = (void *) ADV7511 },
+	{ .compatible = "adi,adv7511w", .data = (void *) ADV7511 },
+	{ .compatible = "adi,adv7513", .data = (void *) ADV7511 },
+	{ .compatible = "adi,adv7533", .data = (void *) ADV7533 },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, adv7511_of_ids);
+
 static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 {
 	struct adv7511_link_config link_config;
@@ -871,7 +1341,21 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	adv7511->powered = false;
 	adv7511->status = connector_status_disconnected;
 
-	ret = adv7511_parse_dt(dev->of_node, &link_config);
+	if (dev->of_node) {
+		const struct of_device_id *of_id;
+
+		of_id = of_match_node(adv7511_of_ids, dev->of_node);
+		adv7511->type = (enum adv7511_type) of_id->data;
+	} else {
+		adv7511->type = id->driver_data;
+	}
+
+	memset(&link_config, 0, sizeof(link_config));
+
+	if (adv7511->type == ADV7511)
+		ret = adv7511_parse_dt(dev->of_node, &link_config);
+	else
+		ret = adv7533_parse_dt(dev->of_node, adv7511);
 	if (ret)
 		return ret;
 
@@ -897,10 +1381,19 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 		return ret;
 	dev_dbg(dev, "Rev. %d\n", val);
 
-	ret = regmap_register_patch(adv7511->regmap, adv7511_fixed_registers,
-				    ARRAY_SIZE(adv7511_fixed_registers));
-	if (ret)
-		return ret;
+	if (adv7511->type == ADV7511) {
+		ret = regmap_register_patch(adv7511->regmap,
+				adv7511_fixed_registers,
+				ARRAY_SIZE(adv7511_fixed_registers));
+		if (ret)
+			return ret;
+	} else {
+		ret = regmap_register_patch(adv7511->regmap,
+				adv7533_fixed_registers,
+				ARRAY_SIZE(adv7533_fixed_registers));
+		if (ret)
+			return ret;
+	}
 
 	regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, edid_i2c_addr);
 	regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR,
@@ -909,9 +1402,42 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	adv7511_packet_disable(adv7511, 0xffff);
 
 	adv7511->i2c_main = i2c;
+
+	adv7511->i2c_packet = i2c_new_dummy(i2c->adapter, packet_i2c_addr >> 1);
+	if (!adv7511->i2c_packet)
+		return -ENOMEM;
+
 	adv7511->i2c_edid = i2c_new_dummy(i2c->adapter, edid_i2c_addr >> 1);
 	if (!adv7511->i2c_edid)
-		return -ENOMEM;
+		goto err_i2c_unregister_packet;
+
+	adv7511->i2c_cec = i2c_new_dummy(i2c->adapter, cec_i2c_addr >> 1);
+	if (!adv7511->i2c_cec) {
+		ret = -ENOMEM;
+		goto err_i2c_unregister_edid;
+	}
+
+	adv7511->regmap_cec = devm_regmap_init_i2c(adv7511->i2c_cec,
+					&adv7533_cec_regmap_config);
+	if (IS_ERR(adv7511->regmap_cec)) {
+		ret = PTR_ERR(adv7511->regmap_cec);
+		goto err_i2c_unregister_cec;
+	}
+
+	adv7511->regmap_packet = devm_regmap_init_i2c(adv7511->i2c_packet,
+		&adv7533_packet_regmap_config);
+	if (IS_ERR(adv7511->regmap_packet)) {
+		ret = PTR_ERR(adv7511->regmap_packet);
+		goto err_i2c_unregister_cec;
+	}
+
+	if (adv7511->type == ADV7533) {
+		ret = regmap_register_patch(adv7511->regmap_cec,
+				adv7533_cec_fixed_registers,
+				ARRAY_SIZE(adv7533_cec_fixed_registers));
+		if (ret)
+			return ret;
+	}
 
 	if (i2c->irq) {
 		init_waitqueue_head(&adv7511->wq);
@@ -921,7 +1447,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 						IRQF_ONESHOT, dev_name(dev),
 						adv7511);
 		if (ret)
-			goto err_i2c_unregister_device;
+			goto err_i2c_unregister_cec;
 	}
 
 	/* CEC is unused for now */
@@ -932,12 +1458,30 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
 	i2c_set_clientdata(i2c, adv7511);
 
-	adv7511_set_link_config(adv7511, &link_config);
+	if (adv7511->type == ADV7511)
+		adv7511_set_link_config(adv7511, &link_config);
+
+	if (adv7511->type == ADV7533) {
+		adv7511->bridge.funcs = &adv7533_bridge_funcs;
+		adv7511->bridge.of_node = dev->of_node;
+
+		ret = drm_bridge_add(&adv7511->bridge);
+		if (ret) {
+			dev_err(dev, "failed to add adv7533 bridge\n");
+			goto err_i2c_unregister_cec;
+		}
+	}
+
+	adv7511_audio_init(dev);
 
 	return 0;
 
-err_i2c_unregister_device:
+err_i2c_unregister_cec:
+	i2c_unregister_device(adv7511->i2c_cec);
+err_i2c_unregister_edid:
 	i2c_unregister_device(adv7511->i2c_edid);
+err_i2c_unregister_packet:
+	i2c_unregister_device(adv7511->i2c_packet);
 
 	return ret;
 }
@@ -946,10 +1490,18 @@ static int adv7511_remove(struct i2c_client *i2c)
 {
 	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
 
+	adv7511_audio_exit(&i2c->dev);
+	i2c_unregister_device(adv7511->i2c_cec);
 	i2c_unregister_device(adv7511->i2c_edid);
 
 	kfree(adv7511->edid);
 
+	if (adv7511->type == ADV7533) {
+		mipi_dsi_detach(adv7511->dsi);
+		mipi_dsi_unregister_device(adv7511->dsi);
+		drm_bridge_remove(&adv7511->bridge);
+	}
+
 	return 0;
 }
 
@@ -959,6 +1511,9 @@ static int adv7511_encoder_init(struct i2c_client *i2c, struct drm_device *dev,
 
 	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
 
+	if (adv7511->type == ADV7533)
+		return -ENODEV;
+
 	encoder->slave_priv = adv7511;
 	encoder->slave_funcs = &adv7511_encoder_funcs;
 
@@ -968,21 +1523,14 @@ static int adv7511_encoder_init(struct i2c_client *i2c, struct drm_device *dev,
 }
 
 static const struct i2c_device_id adv7511_i2c_ids[] = {
-	{ "adv7511", 0 },
-	{ "adv7511w", 0 },
-	{ "adv7513", 0 },
+	{ "adv7511", ADV7511 },
+	{ "adv7511w", ADV7511 },
+	{ "adv7513", ADV7511 },
+	{ "adv7533", ADV7533 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adv7511_i2c_ids);
 
-static const struct of_device_id adv7511_of_ids[] = {
-	{ .compatible = "adi,adv7511", },
-	{ .compatible = "adi,adv7511w", },
-	{ .compatible = "adi,adv7513", },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, adv7511_of_ids);
-
 static struct drm_i2c_encoder_driver adv7511_driver = {
 	.i2c_driver = {
 		.driver = {
diff --git a/drivers/gpu/drm/i2c/adv7511.h b/drivers/gpu/drm/i2c/adv7511.h
index 6599ed538426..d8cf1e42174e 100644
--- a/drivers/gpu/drm/i2c/adv7511.h
+++ b/drivers/gpu/drm/i2c/adv7511.h
@@ -10,6 +10,16 @@
 #define __DRM_I2C_ADV7511_H__
 
 #include <linux/hdmi.h>
+#include <drm/drm_crtc_helper.h>
+
+struct regmap;
+struct adv7511;
+
+int adv7511_packet_enable(struct adv7511 *adv7511, unsigned int packet);
+int adv7511_packet_disable(struct adv7511 *adv7511, unsigned int packet);
+
+int adv7511_audio_init(struct device *dev);
+void adv7511_audio_exit(struct device *dev);
 
 #define ADV7511_REG_CHIP_REVISION		0x00
 #define ADV7511_REG_N0				0x01
@@ -229,6 +239,56 @@ enum adv7511_sync_polarity {
 	ADV7511_SYNC_POLARITY_HIGH,
 };
 
+enum adv7511_type {
+	ADV7511,
+	ADV7533,
+};
+
+struct adv7511 {
+	struct i2c_client *i2c_main;
+	struct i2c_client *i2c_edid;
+	struct i2c_client *i2c_cec;
+	struct i2c_client *i2c_packet;
+
+	struct regmap *regmap;
+	struct regmap *regmap_cec;
+	struct regmap *regmap_packet;
+	enum drm_connector_status status;
+	bool powered;
+
+	struct drm_display_mode curr_mode;
+
+	unsigned int f_tmds;
+	unsigned int f_audio;
+	unsigned int audio_source;
+
+	unsigned int current_edid_segment;
+	uint8_t edid_buf[256];
+	bool edid_read;
+
+	wait_queue_head_t wq;
+	struct drm_encoder *encoder;
+
+	struct drm_connector connector;
+	struct drm_bridge bridge;
+
+	bool embedded_sync;
+	enum adv7511_sync_polarity vsync_polarity;
+	enum adv7511_sync_polarity hsync_polarity;
+	bool rgb;
+
+	struct edid *edid;
+
+	struct gpio_desc *gpio_pd;
+
+	/* ADV7533 DSI RX related params */
+	struct device_node *host_node;
+	struct mipi_dsi_device *dsi;
+	u8 num_dsi_lanes;
+
+	enum adv7511_type type;
+};
+
 /**
  * struct adv7511_link_config - Describes adv7511 hardware configuration
  * @input_color_depth:		Number of bits per color component (8, 10 or 12)
diff --git a/drivers/gpu/drm/i2c/adv7511_audio.c b/drivers/gpu/drm/i2c/adv7511_audio.c
new file mode 100644
index 000000000000..52019e95d007
--- /dev/null
+++ b/drivers/gpu/drm/i2c/adv7511_audio.c
@@ -0,0 +1,312 @@
+/*
+ * Analog Devices ADV7511 HDMI transmitter driver
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "adv7511.h"
+
+static const struct snd_soc_dapm_widget adv7511_dapm_widgets[] = {
+	SND_SOC_DAPM_OUTPUT("TMDS"),
+	SND_SOC_DAPM_AIF_IN("AIFIN", "Playback", 0, SND_SOC_NOPM, 0, 0),
+};
+
+static const struct snd_soc_dapm_route adv7511_routes[] = {
+	{ "TMDS", NULL, "AIFIN" },
+};
+
+static void adv7511_calc_cts_n(unsigned int f_tmds, unsigned int fs,
+			       unsigned int *cts, unsigned int *n)
+{
+	switch (fs) {
+	case 32000:
+		*n = 4096;
+		break;
+	case 44100:
+		*n = 6272;
+		break;
+	case 48000:
+		*n = 6144;
+		break;
+	}
+
+	*cts = ((f_tmds * *n) / (128 * fs)) * 1000;
+}
+
+static int adv7511_update_cts_n(struct adv7511 *adv7511)
+{
+	unsigned int cts = 0;
+	unsigned int n = 0;
+
+	adv7511_calc_cts_n(adv7511->f_tmds, adv7511->f_audio, &cts, &n);
+
+	regmap_write(adv7511->regmap, ADV7511_REG_N0, (n >> 16) & 0xf);
+	regmap_write(adv7511->regmap, ADV7511_REG_N1, (n >> 8) & 0xff);
+	regmap_write(adv7511->regmap, ADV7511_REG_N2, n & 0xff);
+
+	regmap_write(adv7511->regmap, ADV7511_REG_CTS_MANUAL0,
+		     (cts >> 16) & 0xf);
+	regmap_write(adv7511->regmap, ADV7511_REG_CTS_MANUAL1,
+		     (cts >> 8) & 0xff);
+	regmap_write(adv7511->regmap, ADV7511_REG_CTS_MANUAL2,
+		     cts & 0xff);
+
+	return 0;
+}
+
+static int adv7511_hw_params(struct snd_pcm_substream *substream,
+			     struct snd_pcm_hw_params *params,
+			     struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_codec *codec = rtd->codec;
+	struct adv7511 *adv7511 = snd_soc_codec_get_drvdata(codec);
+	unsigned int rate;
+	unsigned int len;
+	switch (params_rate(params)) {
+	case 32000:
+		rate = ADV7511_SAMPLE_FREQ_32000;
+		break;
+	case 44100:
+		rate = ADV7511_SAMPLE_FREQ_44100;
+		break;
+	case 48000:
+		rate = ADV7511_SAMPLE_FREQ_48000;
+		break;
+	case 88200:
+		rate = ADV7511_SAMPLE_FREQ_88200;
+		break;
+	case 96000:
+		rate = ADV7511_SAMPLE_FREQ_96000;
+		break;
+	case 176400:
+		rate = ADV7511_SAMPLE_FREQ_176400;
+		break;
+	case 192000:
+		rate = ADV7511_SAMPLE_FREQ_192000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		len = ADV7511_I2S_SAMPLE_LEN_16;
+		break;
+	case SNDRV_PCM_FORMAT_S18_3LE:
+		len = ADV7511_I2S_SAMPLE_LEN_18;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		len = ADV7511_I2S_SAMPLE_LEN_20;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		len = ADV7511_I2S_SAMPLE_LEN_24;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	adv7511->f_audio = params_rate(params);
+
+	adv7511_update_cts_n(adv7511);
+
+	regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CFG3,
+			   ADV7511_AUDIO_CFG3_LEN_MASK, len);
+	regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG,
+			   ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4);
+	regmap_write(adv7511->regmap, 0x73, 0x1);
+
+	return 0;
+}
+
+static int adv7511_set_dai_fmt(struct snd_soc_dai *codec_dai,
+			       unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct adv7511 *adv7511 = snd_soc_codec_get_drvdata(codec);
+	unsigned int audio_source, i2s_format = 0;
+	unsigned int invert_clock;
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		audio_source = ADV7511_AUDIO_SOURCE_I2S;
+		i2s_format = ADV7511_I2S_FORMAT_I2S;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		audio_source = ADV7511_AUDIO_SOURCE_I2S;
+		i2s_format = ADV7511_I2S_FORMAT_RIGHT_J;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		audio_source = ADV7511_AUDIO_SOURCE_I2S;
+		i2s_format = ADV7511_I2S_FORMAT_LEFT_J;
+		break;
+//	case SND_SOC_DAIFMT_SPDIF:
+//		audio_source = ADV7511_AUDIO_SOURCE_SPDIF;
+//		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		invert_clock = 0;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		invert_clock = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_SOURCE, 0x70,
+			   audio_source << 4);
+	regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, BIT(6),
+			   invert_clock << 6);
+	regmap_update_bits(adv7511->regmap, ADV7511_REG_I2S_CONFIG, 0x03,
+			   i2s_format);
+
+	adv7511->audio_source = audio_source;
+
+	return 0;
+}
+
+static int adv7511_set_bias_level(struct snd_soc_codec *codec,
+				  enum snd_soc_bias_level level)
+{
+	struct adv7511 *adv7511 = snd_soc_codec_get_drvdata(codec);
+	struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec);
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		switch (adv7511->audio_source) {
+		case ADV7511_AUDIO_SOURCE_I2S:
+			break;
+		case ADV7511_AUDIO_SOURCE_SPDIF:
+			regmap_update_bits(adv7511->regmap,
+					   ADV7511_REG_AUDIO_CONFIG, BIT(7),
+					   BIT(7));
+			break;
+		}
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		if (dapm->bias_level == SND_SOC_BIAS_STANDBY) {
+			adv7511_packet_enable(adv7511,
+					ADV7511_PACKET_ENABLE_AUDIO_SAMPLE);
+			adv7511_packet_enable(adv7511,
+					ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME);
+			adv7511_packet_enable(adv7511,
+					ADV7511_PACKET_ENABLE_N_CTS);
+		} else {
+			adv7511_packet_disable(adv7511,
+					ADV7511_PACKET_ENABLE_AUDIO_SAMPLE);
+			adv7511_packet_disable(adv7511,
+					ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME);
+			adv7511_packet_disable(adv7511,
+					ADV7511_PACKET_ENABLE_N_CTS);
+		}
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG,
+				   BIT(7), 0);
+		break;
+	case SND_SOC_BIAS_OFF:
+		break;
+	}
+	dapm->bias_level = level;
+	return 0;
+}
+
+#define ADV7511_RATES (SNDRV_PCM_RATE_32000 |\
+		SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |\
+		SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000 |\
+		SNDRV_PCM_RATE_176400 | SNDRV_PCM_RATE_192000)
+
+#define ADV7511_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S18_3LE |\
+		SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S24_LE)
+
+static const struct snd_soc_dai_ops adv7511_dai_ops = {
+	.hw_params	= adv7511_hw_params,
+	/*.set_sysclk	= adv7511_set_dai_sysclk,*/
+	.set_fmt	= adv7511_set_dai_fmt,
+};
+
+static struct snd_soc_dai_driver adv7511_dai = {
+	.name = "adv7511",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 2,
+		.rates = ADV7511_RATES,
+		.formats = ADV7511_FORMATS,
+	},
+	.ops = &adv7511_dai_ops,
+};
+
+static int adv7511_suspend(struct snd_soc_codec *codec)
+{
+	return adv7511_set_bias_level(codec, SND_SOC_BIAS_OFF);
+}
+
+static int adv7511_resume(struct snd_soc_codec *codec)
+{
+	return adv7511_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+}
+
+static int adv7511_probe(struct snd_soc_codec *codec)
+{
+	return adv7511_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+}
+
+static int adv7511_remove(struct snd_soc_codec *codec)
+{
+	adv7511_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static struct snd_soc_codec_driver adv7511_codec_driver = {
+	.probe		    = adv7511_probe,
+	.remove		    = adv7511_remove,
+	.suspend	    = adv7511_suspend,
+	.resume		    = adv7511_resume,
+	.set_bias_level	    = adv7511_set_bias_level,
+
+	.dapm_widgets	    = adv7511_dapm_widgets,
+	.num_dapm_widgets   = ARRAY_SIZE(adv7511_dapm_widgets),
+	.dapm_routes	    = adv7511_routes,
+	.num_dapm_routes    = ARRAY_SIZE(adv7511_routes),
+};
+
+int adv7511_audio_init(struct device *dev)
+{
+    return snd_soc_register_codec(dev, &adv7511_codec_driver,
+		&adv7511_dai, 1);
+}
+
+void adv7511_audio_exit(struct device *dev)
+{
+	snd_soc_unregister_codec(dev);
+}
diff --git a/drivers/gpu/drm/i2c/icn_6201.c b/drivers/gpu/drm/i2c/icn_6201.c
new file mode 100644
index 000000000000..ea9e628e12b0
--- /dev/null
+++ b/drivers/gpu/drm/i2c/icn_6201.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2016  http://www.lemaker.org/
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+
+#include <linux/kernel.h>
+#include <linux/semaphore.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+
+
+#define ICN6201_DEBUG    1
+#define ICN6201_NAME	"icn6201"
+
+
+struct icn6201_data_t {
+	unsigned int irq;
+	struct i2c_client *client;
+};
+
+
+
+struct icn6201_cmd {
+	unsigned cmd;
+	unsigned char count;
+	unsigned char para_list[64];
+};
+
+
+static struct icn6201_cmd icm6201_init_cmds[] = {
+
+	{0x20, 2, {0x20, 0x00} },
+
+	{0x21, 2, {0x21, 0x58} },
+	{0x22, 2, {0x22, 0x24} },
+
+	{0x23, 2, {0x23, 0xE6} },
+	{0x24, 2, {0x24, 0x3C} },
+	{0x25, 2, {0x25, 0x02} },
+	{0x26, 2, {0x26, 0x00} },
+	{0x27, 2, {0x27, 0x14} },
+
+	{0x28, 2, {0x28, 0x0A} },
+	{0x29, 2, {0x29, 0x14} },
+	{0x34, 2, {0x34, 0x80} },	/*buffer*/
+	{0x36, 2, {0x36, 0xE6} },	/*buffer*/
+
+	{0xB5, 2, {0xB5, 0xA0} },
+	{0x5C, 2, {0x5C, 0xFF} },	/*delay*/
+
+	{0x13, 2, {0x13, 0x10} },	/*8bit, 6bit del 10(8) 00(6)*/
+
+	{0x56, 2, {0x56, 0x90} }, /*0x90 extern clk , 0x92 inter clk*/
+
+	{0x6B, 2, {0x6B, 0x21} }, /* lvds clk*/
+
+	{0x69, 2, {0x69, 0x1D} }, /*lvds clk*/
+
+	{0xB6, 2, {0xB6, 0x20} },
+
+	{0x51, 2, {0x51, 0x20} }, /*pll*/
+	{0x09, 2, {0x09, 0x10} }  /*disply on */
+
+};
+
+
+/*
+*icn6201_i2c_Read-read data and write data by i2c
+*@client: handle of i2c
+*@writebuf: Data that will be written to the slave
+*@writelen: How many bytes to write
+*@readbuf: Where to store data read from slave
+*@readlen: How many bytes to read
+*
+*Returns negative errno, else the number of messages executed
+*
+*
+*/
+int icn6201_i2c_Read(struct i2c_client *client, char *writebuf,
+		    int writelen, char *readbuf, int readlen)
+{
+	int ret;
+
+	if (writelen > 0) {
+		struct i2c_msg msgs[] = {
+			{
+			 .addr = client->addr,
+			 .flags = 0,
+			 .len = writelen,
+			 .buf = writebuf,
+			 },
+			{
+			 .addr = client->addr,
+			 .flags = I2C_M_RD,
+			 .len = readlen,
+			 .buf = readbuf,
+			 },
+		};
+		ret = i2c_transfer(client->adapter, msgs, 2);
+		if (ret < 0)
+			dev_err(&client->dev, "f%s: i2c read error.\n",
+				__func__);
+	} else {
+		struct i2c_msg msgs[] = {
+			{
+			 .addr = client->addr,
+			 .flags = I2C_M_RD,
+			 .len = readlen,
+			 .buf = readbuf,
+			 },
+		};
+		ret = i2c_transfer(client->adapter, msgs, 1);
+		if (ret < 0)
+			dev_err(&client->dev, "%s:i2c read error\n", __func__);
+	}
+	return ret;
+}
+
+/*write data by i2c*/
+int ic6201_i2c_Write(struct i2c_client *client, char *writebuf, int writelen)
+{
+	int ret;
+
+	struct i2c_msg msg[] = {
+		{
+		 .addr = client->addr,
+		 .flags = 0,
+		 .len = writelen,
+		 .buf = writebuf,
+		 },
+	};
+
+	ret = i2c_transfer(client->adapter, msg, 1);
+	if (ret < 0)
+		dev_err(&client->dev, "%s i2c write error.\n", __func__);
+
+	return ret;
+}
+
+
+static void icn6201_write_cmds(struct i2c_client *client,
+				struct icn6201_cmd *table, unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		unsigned cmd;
+
+		cmd = table[i].cmd;
+		ic6201_i2c_Write(client, table[i].para_list, table[i].count);
+	}
+
+}
+
+
+static int icn6201_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	int err = 0;
+	unsigned char uc_reg_value;
+	unsigned char uc_reg_addr;
+	struct device *dev = &client->dev;
+	struct icn6201_data_t *icn6201_data;
+	unsigned int irq_no = 0;
+
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit_check_functionality_failed;
+	}
+
+	icn6201_data = devm_kzalloc(dev, sizeof(*icn6201_data), GFP_KERNEL);
+
+	if (!icn6201_data) {
+		err = -ENOMEM;
+		goto exit_alloc_data_failed;
+	}
+
+	i2c_set_clientdata(client, icn6201_data);
+
+
+
+	icn6201_data->irq = irq_no;
+	icn6201_data->client = client;
+
+
+	mdelay(500);
+	icn6201_write_cmds(client, icm6201_init_cmds,
+		sizeof(icm6201_init_cmds) / sizeof(struct icn6201_cmd));
+
+
+	/*make sure ICN6201 already finish startup process */
+	msleep(150);
+
+
+#ifdef ICN6201_DEBUG
+	/*get some register information */
+	uc_reg_addr = 0x20;
+	icn6201_i2c_Read(client, &uc_reg_addr, 1, &uc_reg_value, 1);
+	dev_dbg(&client->dev, "[FTS] Firmware version = 0x%x\n", uc_reg_value);
+
+	printk(KERN_DEBUG "icn6201_probe succssss!\n");
+#endif
+
+	return 0;
+
+
+exit_alloc_data_failed:
+exit_check_functionality_failed:
+	return err;
+}
+
+static int icn6201_remove(struct i2c_client *client)
+{
+	struct icn6201_data_t *icn6201_data;
+
+	icn6201_data = i2c_get_clientdata(client);
+
+	/*free_irq(client->irq, icn6201_data);*/
+	/* kfree(icn6201_data);*/
+	i2c_set_clientdata(client, NULL);
+	return 0;
+}
+
+static const struct i2c_device_id icn6201_id[] = {
+	{ICN6201_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, icn6201_id);
+
+static const struct of_device_id inc6201_of_match[] = {
+	{ .compatible = "ChipOne,icn6201", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, inc6201_of_match);
+
+static struct i2c_driver icn6201_driver = {
+	.probe = icn6201_probe,
+	.remove = icn6201_remove,
+	.id_table = icn6201_id,
+	.driver = {
+		   .name = ICN6201_NAME,
+		   .owner = THIS_MODULE,
+		   .of_match_table = inc6201_of_match,
+		   },
+};
+
+static int __init icn6201_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&icn6201_driver);
+	if (ret) {
+		pr_err("Adding icn6201 driver failed errno = %d\n",
+		       ret);
+	} else {
+		pr_info("Successfully added driver %s\n",
+			icn6201_driver.driver.name);
+	}
+
+	return ret;
+}
+
+static void __exit icn6201_exit(void)
+{
+	i2c_del_driver(&icn6201_driver);
+}
+
+
+module_init(icn6201_init);
+module_exit(icn6201_exit);
+
+MODULE_AUTHOR("support@lemaker.org");
+MODULE_DESCRIPTION("ICN6201 driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 7d4704b1292b..3f129fd4d3e5 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -51,4 +51,12 @@ config DRM_PANEL_SHARP_LQ101R1SX01
 	  To compile this driver as a module, choose M here: the module
 	  will be called panel-sharp-lq101r1sx01.
 
+config DRM_PANEL_HIKEY
+	tristate "HiKey LCD panel driver"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	help
+	  Say Y here if you want to enable LCD panel driver for HiKey board.
+	  Current support panel: INNOLUX N070ICN-PB1(800x1280).
+
 endmenu
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index d0f016dd7ddb..5660166f2235 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
+obj-$(CONFIG_DRM_PANEL_HIKEY) += panel-hikey.o
diff --git a/drivers/gpu/drm/panel/panel-hikey.c b/drivers/gpu/drm/panel/panel-hikey.c
new file mode 100644
index 000000000000..e116a47dc377
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-hikey.c
@@ -0,0 +1,529 @@
+/*
+ * HiKey LCD panel driver
+ * TODO: Add backlight adjustment support.
+ *
+ * Copyright (c) 2016 Linaro Limited.
+ * Copyright (c) 2016 Hisilicon Limited.
+ * Copyright (C) 2016 LeMaker
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/gpio/consumer.h>
+#include <video/mipi_display.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#define REGFLAG_DELAY         	0XFFE
+
+struct hikey_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	bool prepared;
+	bool enabled;
+
+	struct gpio_desc *gpio_pwr_en;
+	struct gpio_desc *gpio_bl_en;
+	struct gpio_desc *gpio_pwm;
+};
+
+struct dsi_panel_cmd {
+	u32 cmd;	/* cmd: DCS command */
+	u32 len;	/* command payload length */
+	u8 data[64];	/* buffer containing the command payload */
+};
+
+static struct dsi_panel_cmd n070icn_init_cmds[] = {
+	{0xFF,4,{0xAA,0x55,0xA5,0x80}},//========== Internal setting ==========
+
+	{0x6F,2,{0x11,0x00}},// MIPI related Timing Setting
+	{0xF7,2,{0x20,0x00}},
+
+	{0x6F,1,{0x06}},//  Improve ESD option
+	{0xF7,1,{0xA0}},
+	{0x6F,1,{0x19}},
+	{0xF7,1,{0x12}},
+	{0xF4,1,{0x03}},
+
+	{0x6F,1,{0x08}},// Vcom floating
+	{0xFA,1,{0x40}},
+	{0x6F,1,{0x11}},
+	{0xF3,1,{0x01}},
+
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x00}},//========== page0 relative ==========
+	{0xC8,1,{0x80}},
+
+	{0xB1,2,{0x6C,0x01}},// Set WXGA resolution
+
+	{0xB6,1,{0x08}},// Set source output hold time
+
+	{0x6F,1,{0x02}},//EQ control function
+	{0xB8,1,{0x08}},
+
+	{0xBB,2,{0x54,0x54}},// Set bias current for GOP and SOP
+
+	{0xBC,2,{0x05,0x05}},// Inversion setting
+
+	{0xC7,1,{0x01}},// zigzag setting
+
+	{0xBD,5,{0x02,0xB0,0x0C,0x0A,0x00}},// DSP Timing Settings update for BIST
+
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x01}},//========== page1 relative ==========
+
+	{0xB0,2,{0x05,0x05}},// Setting AVDD, AVEE clamp
+	{0xB1,2,{0x05,0x05}},
+
+	{0xBC,2,{0x3A,0x01}},// VGMP, VGMN, VGSP, VGSN setting
+	{0xBD,2,{0x3E,0x01}},
+
+	{0xCA,1,{0x00}},// gate signal control
+
+	{0xC0,1,{0x04}},// power IC control
+
+	{0xB2,2,{0x00,0x00}},// VCL SET -2.5V
+
+	{0xBE,1,{0x80}},// VCOM = -1.888V
+
+	{0xB3,2,{0x19,0x19}},// Setting VGH=15V, VGL=-11V
+	{0xB4,2,{0x12,0x12}},
+
+	{0xB9,2,{0x24,0x24}},// power control for VGH, VGL
+	{0xBA,2,{0x14,0x14}},
+
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x02}},//========== page2 relative ==========
+
+	{0xEE,1,{0x01}},//gamma setting
+	{0xEF,4,{0x09,0x06,0x15,0x18}},//Gradient Control for Gamma Voltage
+
+	{0xB0,6,{0x00,0x00,0x00,0x08,0x00,0x17}},
+	{0x6F,1,{0x06}},
+	{0xB0,6,{0x00,0x25,0x00,0x30,0x00,0x45}},
+	{0x6F,1,{0x0C}},
+	{0xB0,4,{0x00,0x56,0x00,0x7A}},
+	{0xB1,6,{0x00,0xA3,0x00,0xE7,0x01,0x20}},
+	{0x6F,1,{0x06}},
+	{0xB1,6,{0x01,0x7A,0x01,0xC2,0x01,0xC5}},
+	{0x6F,1,{0x0C}},
+	{0xB1,4,{0x02,0x06,0x02,0x5F}},
+	{0xB2,6,{0x02,0x92,0x02,0xD0,0x02,0xFC}},
+	{0x6F,1,{0x06}},
+	{0xB2,6,{0x03,0x35,0x03,0x5D,0x03,0x8B}},
+	{0x6F,1,{0x0C}},
+	{0xB2,4,{0x03,0xA2,0x03,0xBF}},
+	{0xB3,4,{0x03,0xD2,0x03,0xFF}},
+
+	//========== GOA relative ==========
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x06}},// PAGE6 : GOUT Mapping, VGLO select
+	{0xB0,2,{0x00,0x17}},
+	{0xB1,2,{0x16,0x15}},
+	{0xB2,2,{0x14,0x13}},
+	{0xB3,2,{0x12,0x11}},
+	{0xB4,2,{0x10,0x2D}},
+	{0xB5,2,{0x01,0x08}},
+	{0xB6,2,{0x09,0x31}},
+	{0xB7,2,{0x31,0x31}},
+	{0xB8,2,{0x31,0x31}},
+	{0xB9,2,{0x31,0x31}},
+	{0xBA,2,{0x31,0x31}},
+	{0xBB,2,{0x31,0x31}},
+	{0xBC,2,{0x31,0x31}},
+	{0xBD,2,{0x31,0x09}},
+	{0xBE,2,{0x08,0x01}},
+	{0xBF,2,{0x2D,0x10}},
+	{0xC0,2,{0x11,0x12}},
+	{0xC1,2,{0x13,0x14}},
+	{0xC2,2,{0x15,0x16}},
+	{0xC3,2,{0x17,0x00}},
+	{0xE5,2,{0x31,0x31}},
+	{0xC4,2,{0x00,0x17}},
+	{0xC5,2,{0x16,0x15}},
+	{0xC6,2,{0x14,0x13}},
+	{0xC7,2,{0x12,0x11}},
+	{0xC8,2,{0x10,0x2D}},
+	{0xC9,2,{0x01,0x08}},
+	{0xCA,2,{0x09,0x31}},
+	{0xCB,2,{0x31,0x31}},
+	{0xCC,2,{0x31,0x31}},
+	{0xCD,2,{0x31,0x31}},
+	{0xCE,2,{0x31,0x31}},
+	{0xCF,2,{0x31,0x31}},
+	{0xD0,2,{0x31,0x31}},
+	{0xD1,2,{0x31,0x09}},
+	{0xD2,2,{0x08,0x01}},
+	{0xD3,2,{0x2D,0x10}},
+	{0xD4,2,{0x11,0x12}},
+	{0xD5,2,{0x13,0x14}},
+	{0xD6,2,{0x15,0x16}},
+	{0xD7,2,{0x17,0x00}},
+	{0xE6,2,{0x31,0x31}},
+	{0xD8,5,{0x00,0x00,0x00,0x00,0x00}},//VGL level select
+	{0xD9,5,{0x00,0x00,0x00,0x00,0x00}},
+	{0xE7,1,{0x00}},
+
+	// PAGE3 :
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x03}},//gate timing control
+	{0xB0,2,{0x20,0x00}},
+	{0xB1,2,{0x20,0x00}},
+	{0xB2,5,{0x05,0x00,0x42,0x00,0x00}},
+	{0xB6,5,{0x05,0x00,0x42,0x00,0x00}},
+	{0xBA,5,{0x53,0x00,0x42,0x00,0x00}},
+	{0xBB,5,{0x53,0x00,0x42,0x00,0x00}},
+	{0xC4,1,{0x40}},
+
+	// gate CLK EQ
+	// gate STV EQ
+
+	// PAGE5 :
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x05}},
+	{0xB0,2,{0x17,0x06}},
+	{0xB8,1,{0x00}},
+	{0xBD,5,{0x03,0x01,0x01,0x00,0x01}},
+	{0xB1,2,{0x17,0x06}},
+	{0xB9,2,{0x00,0x01}},
+	{0xB2,2,{0x17,0x06}},
+	{0xBA,2,{0x00,0x01}},
+	{0xB3,2,{0x17,0x06}},
+	{0xBB,2,{0x0A,0x00}},
+	{0xB4,2,{0x17,0x06}},
+	{0xB5,2,{0x17,0x06}},
+	{0xB6,2,{0x14,0x03}},
+	{0xB7,2,{0x00,0x00}},
+	{0xBC,2,{0x02,0x01}},
+	{0xC0,1,{0x05}},
+	{0xC4,1,{0xA5}},
+	{0xC8,2,{0x03,0x30}},
+	{0xC9,2,{0x03,0x51}},
+	{0xD1,5,{0x00,0x05,0x03,0x00,0x00}},
+	{0xD2,5,{0x00,0x05,0x09,0x00,0x00}},
+	{0xE5,1,{0x02}},
+	{0xE6,1,{0x02}},
+	{0xE7,1,{0x02}},
+	{0xE9,1,{0x02}},
+	{0xED,1,{0x33}},
+
+	/* bist test mode
+	{0xF0,5,{0x55,0xAA,0x52,0x08,0x00}},
+	{0xEF,2,{0x07,0xFF}},
+	{0xEE,4,{0x87,0x78,0x02,0x40}},
+	*/
+
+	{0x11,0,{0x00}},
+	{REGFLAG_DELAY, 120, {}},
+	{0x29,0,{0x00}},
+	{REGFLAG_DELAY, 20, {}},
+};
+
+static int hikey_panel_write_cmds(struct mipi_dsi_device *dsi,
+				  struct dsi_panel_cmd *cmds,
+				  u32 count)
+{
+	struct dsi_panel_cmd *cmd;
+	int ret = 0;
+	u32 i;
+
+	for(i = 0; i < count; i++) {
+		cmd = &cmds[i];
+		switch (cmd->cmd) {
+		case REGFLAG_DELAY:
+			msleep(cmd->len);
+			break;
+		default:
+			ret = mipi_dsi_dcs_write(dsi, cmd->cmd, cmd->data,
+						 cmd->len);
+		}
+	}
+
+	return ret;
+}
+
+static inline struct hikey_panel *to_hikey_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct hikey_panel, base);
+}
+
+static int hikey_panel_disable(struct drm_panel *p)
+{
+	struct hikey_panel *panel = to_hikey_panel(p);
+
+	if (!panel->enabled)
+		return 0;
+
+	/* TODO: send panel off seq cmds */
+	panel->enabled = false;
+
+	gpiod_set_value(panel->gpio_bl_en, 0);
+	return 0;
+}
+
+static int hikey_panel_unprepare(struct drm_panel *p)
+{
+	struct hikey_panel *panel = to_hikey_panel(p);
+
+	if (!panel->prepared)
+		return 0;
+
+	panel->prepared = false;
+
+	return 0;
+}
+
+static int hikey_panel_prepare(struct drm_panel *p)
+{
+	struct hikey_panel *panel = to_hikey_panel(p);
+	int ret;
+
+	if (panel->prepared)
+		return 0;
+
+	/*
+	 * A minimum delay of 250ms is required after power-up until commands
+	 * can be sent
+	 */
+	msleep(250);
+
+#ifndef CONFIG_DRM_ICN_6201
+	/* init the panel */
+	ret = hikey_panel_write_cmds(panel->dsi, n070icn_init_cmds,
+				     ARRAY_SIZE(n070icn_init_cmds));
+	if (ret < 0)
+		return ret;
+#endif
+
+	panel->prepared = true;
+
+	return 0;
+}
+
+static int hikey_panel_enable(struct drm_panel *p)
+{
+	struct hikey_panel *panel = to_hikey_panel(p);
+
+	if (panel->enabled)
+		return 0;
+
+	msleep(200);
+	gpiod_set_value(panel->gpio_bl_en, 1);
+	gpiod_set_value(panel->gpio_pwm, 0);
+
+	panel->enabled = true;
+
+	return 0;
+}
+
+static const struct drm_display_mode default_mode = {
+#ifndef CONFIG_DRM_ICN_6201
+	.clock = 66800,
+
+	.hdisplay = 800,
+	.hsync_start = 800 + 40,
+	.hsync_end = 800 + 40 + 4,
+	.htotal = 800 + 40 + 4 + 40,
+
+	.vdisplay = 1280,
+	.vsync_start = 1280 + 10,
+	.vsync_end = 1280 + 10 + 4,
+	.vtotal = 1280 + 10 + 4 + 12,
+#else
+	.clock = 55000,/*56000,//60000,*/
+
+	.hdisplay = 1024,
+	.hsync_start = 1024 + 140,
+	.hsync_end = 1024 + 140 + 40,
+	.htotal = 1024 + 140 + 40 + 140,
+
+	.vdisplay = 600,
+	.vsync_start = 600 + 15,
+	.vsync_end = 600 + 75 + 5,
+	.vtotal = 600 + 15 + 5 + 75,
+#endif
+};
+
+static int hikey_panel_get_modes(struct drm_panel *panel)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		DRM_ERROR("failed to add mode %ux%ux@%u\n",
+			  default_mode.hdisplay, default_mode.vdisplay,
+			  default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(panel->connector, mode);
+
+	panel->connector->display_info.width_mm = 94;
+	panel->connector->display_info.height_mm = 150;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs hikey_panel_funcs = {
+	.get_modes = hikey_panel_get_modes,
+	.enable = hikey_panel_enable,
+	.disable = hikey_panel_disable,
+	.prepare = hikey_panel_prepare,
+	.unprepare = hikey_panel_unprepare,
+};
+
+static int hikey_panel_add(struct hikey_panel *panel)
+{
+	struct device *dev = &panel->dsi->dev;
+	int ret;
+
+	drm_panel_init(&panel->base);
+	panel->base.funcs = &hikey_panel_funcs;
+	panel->base.dev = dev;
+
+	ret = drm_panel_add(&panel->base);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void hikey_panel_del(struct hikey_panel *panel)
+{
+	if (panel->base.dev)
+		drm_panel_remove(&panel->base);
+}
+
+static int hikey_panel_parse_dt(struct hikey_panel *panel)
+{
+	struct device *dev = &panel->dsi->dev;
+
+	panel->gpio_pwr_en =
+		devm_gpiod_get_optional(dev, "pwr-en", GPIOD_OUT_HIGH);
+	if (IS_ERR(panel->gpio_pwr_en))
+		return PTR_ERR(panel->gpio_pwr_en);
+
+	panel->gpio_bl_en =
+		devm_gpiod_get_optional(dev, "bl-en", GPIOD_OUT_LOW);
+	if (IS_ERR(panel->gpio_bl_en))
+		return PTR_ERR(panel->gpio_bl_en);
+
+	panel->gpio_pwm =
+		devm_gpiod_get_optional(dev, "pwm", GPIOD_OUT_LOW);
+	if (IS_ERR(panel->gpio_pwm))
+		return PTR_ERR(panel->gpio_pwm);
+
+	return 0;
+}
+
+static int hikey_panel_attach_dsi(struct mipi_dsi_device *dsi)
+{
+	int ret;
+
+	dsi->phy_clock = 480000; /* in kHz */
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+			  MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_VIDEO_HSE |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret) {
+		DRM_ERROR("failed to attach dsi to host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hikey_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct hikey_panel *panel;
+	int ret;
+
+	DRM_INFO("hikey_panel_probe enter\n");
+
+	panel = devm_kzalloc(dev, sizeof(*panel), GFP_KERNEL);
+	if (!panel)
+		return -ENOMEM;
+
+	panel->dsi = dsi;
+	ret = hikey_panel_parse_dt(panel);
+	if (ret)
+		return ret;
+
+	ret = hikey_panel_add(panel);
+	if (ret)
+		return ret;
+
+	ret = hikey_panel_attach_dsi(dsi);
+	if (ret){
+		hikey_panel_del(panel);
+		return ret;
+	}
+
+	mipi_dsi_set_drvdata(dsi, panel);
+
+	DRM_INFO("hikey_panel_probe exit\n");
+	return 0;
+}
+
+static int hikey_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct hikey_panel *panel = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = hikey_panel_disable(&panel->base);
+	if (ret < 0)
+		DRM_ERROR("failed to disable panel: %d\n", ret);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		DRM_ERROR("failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_detach(&panel->base);
+	hikey_panel_del(panel);
+
+	return 0;
+}
+
+static void hikey_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct hikey_panel *panel = mipi_dsi_get_drvdata(dsi);
+
+	hikey_panel_disable(&panel->base);
+}
+
+static const struct of_device_id panel_of_match[] = {
+	{ .compatible = "innolux,n070icn-pb1", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, panel_of_match);
+
+static struct mipi_dsi_driver hikey_panel_driver = {
+	.driver = {
+		.name = "hikey-lcd-panel",
+		.of_match_table = panel_of_match,
+	},
+	.probe = hikey_panel_probe,
+	.remove = hikey_panel_remove,
+	.shutdown = hikey_panel_shutdown,
+};
+module_mipi_dsi_driver(hikey_panel_driver);
+
+MODULE_AUTHOR("support@lemaker.org");
+MODULE_DESCRIPTION("INNOLUX N070ICN-PB1 (800x1280) video mode panel driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index de7fbbb374cd..6974a99034c3 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -271,6 +271,17 @@ static void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
 		 enable ? "en" : "dis");
 }
 
+static unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
+{
+	/*
+	 * Clock is not necessary if we got LCNT/HCNT values directly from
+	 * the platform code.
+	 */
+	if (WARN_ON_ONCE(!dev->get_clk_rate_khz))
+		return 0;
+	return dev->get_clk_rate_khz(dev);
+}
+
 /**
  * i2c_dw_init() - initialize the designware i2c master hardware
  * @dev: device private data
@@ -281,7 +292,6 @@ static void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
  */
 int i2c_dw_init(struct dw_i2c_dev *dev)
 {
-	u32 input_clock_khz;
 	u32 hcnt, lcnt;
 	u32 reg;
 	u32 sda_falling_time, scl_falling_time;
@@ -295,8 +305,6 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 		}
 	}
 
-	input_clock_khz = dev->get_clk_rate_khz(dev);
-
 	reg = dw_readl(dev, DW_IC_COMP_TYPE);
 	if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) {
 		/* Configure register endianess access */
@@ -325,12 +333,12 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 		hcnt = dev->ss_hcnt;
 		lcnt = dev->ss_lcnt;
 	} else {
-		hcnt = i2c_dw_scl_hcnt(input_clock_khz,
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
 					4000,	/* tHD;STA = tHIGH = 4.0 us */
 					sda_falling_time,
 					0,	/* 0: DW default, 1: Ideal */
 					0);	/* No offset */
-		lcnt = i2c_dw_scl_lcnt(input_clock_khz,
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
 					4700,	/* tLOW = 4.7 us */
 					scl_falling_time,
 					0);	/* No offset */
@@ -344,12 +352,12 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 		hcnt = dev->fs_hcnt;
 		lcnt = dev->fs_lcnt;
 	} else {
-		hcnt = i2c_dw_scl_hcnt(input_clock_khz,
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
 					600,	/* tHD;STA = tHIGH = 0.6 us */
 					sda_falling_time,
 					0,	/* 0: DW default, 1: Ideal */
 					0);	/* No offset */
-		lcnt = i2c_dw_scl_lcnt(input_clock_khz,
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
 					1300,	/* tLOW = 1.3 us */
 					scl_falling_time,
 					0);	/* No offset */
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6b00061c3746..9e094478b9b3 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -209,16 +209,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 			DW_IC_CON_RESTART_EN | DW_IC_CON_SPEED_FAST;
 
 	dev->clk = devm_clk_get(&pdev->dev, NULL);
-	dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
-	if (IS_ERR(dev->clk))
-		return PTR_ERR(dev->clk);
-	clk_prepare_enable(dev->clk);
+	if (!IS_ERR(dev->clk)) {
+		r = clk_prepare_enable(dev->clk);
+		if (r)
+			return r;
 
-	if (!dev->sda_hold_time && ht) {
-		u32 ic_clk = dev->get_clk_rate_khz(dev);
+		dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
 
-		dev->sda_hold_time = div_u64((u64)ic_clk * ht + 500000,
-					     1000000);
+		if (!dev->sda_hold_time && ht)
+			dev->sda_hold_time = div_u64(
+				(u64)dev->get_clk_rate_khz(dev) * ht + 500000,
+				1000000);
 	}
 
 	if (!dev->tx_fifo_depth) {
@@ -235,18 +236,26 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
 	adap->dev.of_node = pdev->dev.of_node;
 
-	if (dev->pm_runtime_disabled) {
-		pm_runtime_forbid(&pdev->dev);
-	} else {
-		pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
-		pm_runtime_use_autosuspend(&pdev->dev);
-		pm_runtime_set_active(&pdev->dev);
-		pm_runtime_enable(&pdev->dev);
-	}
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	if (dev->pm_runtime_disabled)
+		pm_runtime_get_noresume(&pdev->dev);
 
 	r = i2c_dw_probe(dev);
-	if (r && !dev->pm_runtime_disabled)
+	if (r) {
+		if (!IS_ERR(dev->clk))
+			clk_disable_unprepare(dev->clk);
 		pm_runtime_disable(&pdev->dev);
+		pm_runtime_dont_use_autosuspend(&pdev->dev);
+		if (dev->pm_runtime_disabled)
+			pm_runtime_put_noidle(&pdev->dev);
+	}
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put(&pdev->dev);
 
 	return r;
 }
@@ -261,10 +270,16 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
 
 	i2c_dw_disable(dev);
 
+	if (!IS_ERR(dev->clk))
+		clk_disable_unprepare(dev->clk);
+
+	pm_runtime_disable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
-	pm_runtime_put_sync(&pdev->dev);
-	if (!dev->pm_runtime_disabled)
-		pm_runtime_disable(&pdev->dev);
+	if (dev->pm_runtime_disabled)
+		pm_runtime_put_noidle(&pdev->dev);
+
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
 
 	return 0;
 }
@@ -277,52 +292,47 @@ static const struct of_device_id dw_i2c_of_match[] = {
 MODULE_DEVICE_TABLE(of, dw_i2c_of_match);
 #endif
 
-#ifdef CONFIG_PM_SLEEP
-static int dw_i2c_plat_prepare(struct device *dev)
+#ifdef CONFIG_PM
+static int i2c_dw_plat_prepare_clk(struct dw_i2c_dev *i_dev, bool prepare)
 {
-	return pm_runtime_suspended(dev);
-}
+	if (IS_ERR(i_dev->clk))
+		return PTR_ERR(i_dev->clk);
 
-static void dw_i2c_plat_complete(struct device *dev)
-{
-	if (dev->power.direct_complete)
-		pm_request_resume(dev);
+	if (prepare)
+		return clk_prepare_enable(i_dev->clk);
+
+	clk_disable_unprepare(i_dev->clk);
+	return 0;
 }
-#else
-#define dw_i2c_plat_prepare	NULL
-#define dw_i2c_plat_complete	NULL
-#endif
 
-#ifdef CONFIG_PM
-static int dw_i2c_plat_suspend(struct device *dev)
+static int dw_i2c_plat_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
 	i2c_dw_disable(i_dev);
-	clk_disable_unprepare(i_dev->clk);
+	i2c_dw_plat_prepare_clk(i_dev, false);
 
 	return 0;
 }
 
-static int dw_i2c_plat_resume(struct device *dev)
+static int dw_i2c_plat_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
-	clk_prepare_enable(i_dev->clk);
-
-	if (!i_dev->pm_runtime_disabled)
-		i2c_dw_init(i_dev);
+	i2c_dw_plat_prepare_clk(i_dev, true);
+	i2c_dw_init(i_dev);
 
 	return 0;
 }
 
 static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
-	.prepare = dw_i2c_plat_prepare,
-	.complete = dw_i2c_plat_complete,
-	SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
-	SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend,
+			dw_i2c_plat_runtime_resume,
+			NULL)
 };
 
 #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops)
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index ae33da7ab51f..8e53f8e9aae9 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -1106,4 +1106,16 @@ config TOUCHSCREEN_ROHM_BU21023
 	  To compile this driver as a module, choose M here: the
 	  module will be called bu21023_ts.
 
+config TOUCHSCREEN_FT5X0X
+        tristate "FocalTech ft5x0x TouchScreen driver"
+        depends on I2C
+        help
+          Say Y here to support FocalTech ft5x0x TouchScreen on
+          Hikey development board.
+
+          If unsure, say N.
+
+          To compile this driver as a module, choose M here: the
+          module will be called ft5x06_ts.ko.
+
 endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index cbaa6abb08da..1b9c5f36094c 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -91,3 +91,6 @@ obj-$(CONFIG_TOUCHSCREEN_TPS6507X)	+= tps6507x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_ZFORCE)	+= zforce_ts.o
 obj-$(CONFIG_TOUCHSCREEN_COLIBRI_VF50)	+= colibri-vf50-ts.o
 obj-$(CONFIG_TOUCHSCREEN_ROHM_BU21023)	+= rohm_bu21023.o
+
+obj-$(CONFIG_TOUCHSCREEN_FT5X0X)        +=   ft5x06_ts.o
+
diff --git a/drivers/input/touchscreen/ft5x06_ts.c b/drivers/input/touchscreen/ft5x06_ts.c
new file mode 100644
index 000000000000..a4f08475ab64
--- /dev/null
+++ b/drivers/input/touchscreen/ft5x06_ts.c
@@ -0,0 +1,498 @@
+/* drivers/input/touchscreen/ft5x06_ts.c
+ *
+ * FocalTech ft5x0x TouchScreen driver.
+ *
+ * Copyright (c) 2010  Focal tech Ltd.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/ft5x06_ts.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/semaphore.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#define FT5X0X_DEBUG    1
+/*#define SUPPORT_MULTI_TOUCH  1 */
+
+struct ts_event {
+	u16 au16_x[CFG_MAX_TOUCH_POINTS];	/*x coordinate */
+	u16 au16_y[CFG_MAX_TOUCH_POINTS];	/*y coordinate */
+	/* touch event:
+	 * 0 -- down;
+	 * 1-- contact;
+	 * 2 -- contact
+	 */
+	u8 au8_touch_event[CFG_MAX_TOUCH_POINTS];
+	u8 au8_finger_id[CFG_MAX_TOUCH_POINTS];	/*touch ID */
+	u16 pressure;
+	u8 touch_point;
+};
+
+struct ft5x0x_ts_data {
+	unsigned int irq;
+	unsigned int x_max;
+	unsigned int y_max;
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	struct ts_event event;
+	struct ft5x0x_platform_data *pdata;
+#ifdef CONFIG_PM
+	struct early_suspend *early_suspend;
+#endif
+};
+
+#define FTS_POINT_UP		0x01
+#define FTS_POINT_DOWN		0x00
+#define FTS_POINT_CONTACT	0x02
+
+#ifdef CONFIG_DRM_ICN_6201
+#define FT5X06_X_MAX	1024
+#define FT5X06_Y_MAX	600
+#else
+#define FT5X06_X_MAX	800
+#define FT5X06_Y_MAX	1280
+#endif
+
+
+/*
+*ft5x0x_i2c_Read-read data and write data by i2c
+*@client: handle of i2c
+*@writebuf: Data that will be written to the slave
+*@writelen: How many bytes to write
+*@readbuf: Where to store data read from slave
+*@readlen: How many bytes to read
+*
+*Returns negative errno, else the number of messages executed
+*
+*
+*/
+int ft5x0x_i2c_Read(struct i2c_client *client, char *writebuf,
+		    int writelen, char *readbuf, int readlen)
+{
+	int ret;
+
+	if (writelen > 0) {
+		struct i2c_msg msgs[] = {
+			{
+				 .addr = client->addr,
+				 .flags = 0,
+				 .len = writelen,
+				 .buf = writebuf,
+			 },
+			{
+				 .addr = client->addr,
+				 .flags = I2C_M_RD,
+				 .len = readlen,
+				 .buf = readbuf,
+			 },
+		};
+		ret = i2c_transfer(client->adapter, msgs, 2);
+		if (ret < 0)
+			dev_err(&client->dev, "%s: i2c read error.\n",
+				__func__);
+	} else {
+		struct i2c_msg msgs[] = {
+			{
+				 .addr = client->addr,
+				 .flags = I2C_M_RD,
+				 .len = readlen,
+				 .buf = readbuf,
+			 },
+		};
+		ret = i2c_transfer(client->adapter, msgs, 1);
+		if (ret < 0)
+			dev_err(&client->dev, "%s:i2c read error.\n", __func__);
+	}
+	return ret;
+}
+
+/*write data by i2c*/
+int ft5x0x_i2c_Write(struct i2c_client *client, char *writebuf, int writelen)
+{
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			 .addr = client->addr,
+			 .flags = 0,
+			 .len = writelen,
+			 .buf = writebuf,
+		 },
+	};
+	ret = i2c_transfer(client->adapter, msgs, 1);
+	if (ret < 0)
+		dev_err(&client->dev, "%s: i2c write error.\n", __func__);
+	return ret;
+}
+
+/*release the point*/
+static void ft5x0x_ts_release(struct ft5x0x_ts_data *data)
+{
+#ifdef SUPPORT_MULTI_TOUCH
+	input_report_abs(data->input_dev, ABS_MT_TOUCH_MAJOR, 0);
+#else
+	input_report_abs(data->input_dev, ABS_PRESSURE, 0);
+#endif
+	input_sync(data->input_dev);
+}
+
+/*
+*report the point information
+*/
+static void ft5x0x_report_value(struct ft5x0x_ts_data *data)
+{
+	struct ts_event *event = &data->event;
+	int i = 0;
+
+	for (i = 0; i < event->touch_point; i++) {
+		/* LCD view area */
+#ifdef SUPPORT_MULTI_TOUCH
+		if (event->au16_x[i] < data->x_max
+		    && event->au16_y[i] < data->y_max) {
+			input_report_abs(data->input_dev, ABS_MT_POSITION_X,
+					 event->au16_x[i]);
+			input_report_abs(data->input_dev, ABS_MT_POSITION_Y,
+					 event->au16_y[i]);
+			input_report_abs(data->input_dev, ABS_MT_PRESSURE,
+					 event->pressure);
+			input_report_abs(data->input_dev, ABS_MT_TRACKING_ID,
+					 event->au8_finger_id[i]);
+			if (event->au8_touch_event[i] == FTS_POINT_DOWN
+			|| event->au8_touch_event[i] == FTS_POINT_CONTACT) {
+				input_report_abs(data->input_dev,
+					ABS_MT_TOUCH_MAJOR, event->pressure);
+				input_report_abs(data->input_dev,
+					ABS_MT_PRESSURE, event->pressure);
+				input_report_key(data->input_dev,
+						BTN_TOUCH, 1);
+			} else {
+				input_report_abs(data->input_dev,
+						ABS_MT_TOUCH_MAJOR, 0);
+				input_report_abs(data->input_dev,
+						ABS_MT_PRESSURE, 0);
+				input_report_key(data->input_dev,
+						BTN_TOUCH, 0);
+			}
+		}
+		input_mt_sync(data->input_dev);
+#else
+		if (event->au16_x[i] < data->x_max
+		    && event->au16_y[i] < data->y_max) {
+			input_report_abs(data->input_dev, ABS_X,
+					event->au16_x[i]);
+			input_report_abs(data->input_dev, ABS_Y,
+					event->au16_y[i]);
+			if (event->au8_touch_event[i] == FTS_POINT_DOWN
+			|| event->au8_touch_event[i] == FTS_POINT_CONTACT) {
+				input_report_abs(data->input_dev,
+						ABS_PRESSURE, 1);
+				input_report_key(data->input_dev,
+						BTN_TOUCH, 1);
+			} else {
+				input_report_abs(data->input_dev,
+						ABS_PRESSURE, 0);
+				input_report_key(data->input_dev,
+						BTN_TOUCH, 0);
+			}
+		}
+
+		input_sync(data->input_dev);
+#endif
+	}
+	input_sync(data->input_dev);
+
+	if (event->touch_point == 0)
+		ft5x0x_ts_release(data);
+
+}
+
+/*Read touch point information when the interrupt  is asserted.*/
+static int ft5x0x_read_Touchdata(struct ft5x0x_ts_data *data)
+{
+	struct ts_event *event = &data->event;
+	u8 buf[POINT_READ_BUF] = { 0 };
+	int ret = -1;
+	int i = 0;
+	u8 pointid = FT_MAX_ID;
+
+	ret = ft5x0x_i2c_Read(data->client, buf, 1, buf, POINT_READ_BUF);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "%s read touchdata failed.\n",
+			__func__);
+		return ret;
+	}
+	memset(event, 0, sizeof(struct ts_event));
+
+	event->touch_point = 0;
+	for (i = 0; i < CFG_MAX_TOUCH_POINTS; i++) {
+		pointid = (buf[FT_TOUCH_ID_POS + FT_TOUCH_STEP * i]) >> 4;
+		if (pointid >= FT_MAX_ID)
+			break;
+		else
+			event->touch_point++;
+		event->au16_x[i] =
+		    (s16)(buf[FT_TOUCH_X_H_POS + FT_TOUCH_STEP * i] & 0x0F)
+			 << 8 | (s16)buf[FT_TOUCH_X_L_POS + FT_TOUCH_STEP * i];
+		event->au16_y[i] =
+		    (s16)(buf[FT_TOUCH_Y_H_POS + FT_TOUCH_STEP * i] & 0x0F)
+			 << 8 | (s16)buf[FT_TOUCH_Y_L_POS + FT_TOUCH_STEP * i];
+		event->au8_touch_event[i] =
+		    buf[FT_TOUCH_EVENT_POS + FT_TOUCH_STEP * i] >> 6;
+		event->au8_finger_id[i] =
+		    (buf[FT_TOUCH_ID_POS + FT_TOUCH_STEP * i]) >> 4;
+	}
+
+	event->pressure = FT_PRESS;
+
+	return 0;
+}
+
+
+/*The ft5x0x device will signal the host about TRIGGER_FALLING.
+*Processed when the interrupt is asserted.
+*/
+static irqreturn_t ft5x0x_ts_interrupt(int irq, void *dev_id)
+{
+	struct ft5x0x_ts_data *ft5x0x_ts = dev_id;
+	int ret = 0;
+
+	ret = ft5x0x_read_Touchdata(ft5x0x_ts);
+	if (ret == 0)
+		ft5x0x_report_value(ft5x0x_ts);
+
+	return IRQ_HANDLED;
+}
+
+static int ft5x0x_ts_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct ft5x0x_ts_data *ft5x0x_ts;
+	struct input_dev *input_dev;
+	int err = 0;
+	unsigned char uc_reg_value;
+	unsigned char uc_reg_addr;
+	/*unsigned int irq_no;*/
+	struct device *dev = &client->dev;
+	struct gpio_desc *gpio_rst;
+
+
+	gpio_rst = devm_gpiod_get_optional(dev, "rst", GPIOD_OUT_HIGH);
+	if (IS_ERR(gpio_rst))
+		return PTR_ERR(gpio_rst);
+
+	if (gpio_rst) {
+		mdelay(5);
+		gpiod_set_value_cansleep(gpio_rst, 0);
+		mdelay(5);
+		gpiod_set_value_cansleep(gpio_rst, 1);
+		mdelay(50);
+		gpiod_set_value_cansleep(gpio_rst, 0);
+
+	}
+
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		err = -ENODEV;
+		goto exit_check_functionality_failed;
+	}
+
+	ft5x0x_ts = devm_kzalloc(dev, sizeof(*ft5x0x_ts), GFP_KERNEL);
+
+	if (!ft5x0x_ts) {
+		err = -ENOMEM;
+		goto exit_alloc_data_failed;
+	}
+
+	i2c_set_clientdata(client, ft5x0x_ts);
+
+	ft5x0x_ts->irq = client->irq;
+	ft5x0x_ts->client = client;
+
+	ft5x0x_ts->x_max = FT5X06_X_MAX;
+	ft5x0x_ts->y_max = FT5X06_Y_MAX;
+
+	if (client->irq) {
+		err = devm_request_threaded_irq(dev, client->irq, NULL,
+						ft5x0x_ts_interrupt,
+						IRQF_ONESHOT, dev_name(dev),
+						ft5x0x_ts);
+		if (err)
+			goto exit_irq_request_failed;
+	}
+
+
+	input_dev = input_allocate_device();
+	if (!input_dev) {
+		err = -ENOMEM;
+		dev_err(&client->dev, "failed to allocate input device\n");
+		goto exit_input_dev_alloc_failed;
+	}
+
+	ft5x0x_ts->input_dev = input_dev;
+
+#ifdef SUPPORT_MULTI_TOUCH
+	set_bit(ABS_MT_TOUCH_MAJOR, input_dev->absbit);
+	set_bit(ABS_MT_POSITION_X, input_dev->absbit);
+	set_bit(ABS_MT_POSITION_Y, input_dev->absbit);
+	set_bit(ABS_MT_PRESSURE, input_dev->absbit);
+
+	input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0,
+				ft5x0x_ts->x_max, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0,
+				ft5x0x_ts->y_max, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0,
+				PRESS_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_PRESSURE, 0,
+				PRESS_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_TRACKING_ID, 0,
+				CFG_MAX_TOUCH_POINTS, 0, 0);
+#else
+	set_bit(BTN_TOUCH, input_dev->keybit);
+	input_set_abs_params(input_dev, ABS_X, 0, FT5X06_X_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, FT5X06_Y_MAX, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE, 0, 1, 0, 0);
+
+#endif
+
+	set_bit(EV_KEY, input_dev->evbit);
+	set_bit(EV_ABS, input_dev->evbit);
+
+	input_dev->name = FT5X0X_NAME;
+	err = input_register_device(input_dev);
+	if (err) {
+		dev_err(&client->dev,
+			"%s: failed to register: %s\n", __func__,
+			dev_name(&client->dev));
+		goto exit_input_register_device_failed;
+	}
+
+	/*make sure CTP already finish startup process */
+	msleep(150);
+
+
+	/*get some register information */
+	uc_reg_addr = FT5x0x_REG_FW_VER;
+	ft5x0x_i2c_Read(client, &uc_reg_addr, 1, &uc_reg_value, 1);
+	dev_dbg(&client->dev, "[FTS] Firmware version = 0x%x\n", uc_reg_value);
+
+	uc_reg_addr = FT5x0x_REG_POINT_RATE;
+	ft5x0x_i2c_Read(client, &uc_reg_addr, 1, &uc_reg_value, 1);
+	dev_dbg(&client->dev, "[FTS] report rate is %dHz.\n",
+		uc_reg_value * 10);
+
+	uc_reg_addr = FT5X0X_REG_THGROUP;
+	ft5x0x_i2c_Read(client, &uc_reg_addr, 1, &uc_reg_value, 1);
+	dev_dbg(&client->dev, "[FTS] touch threshold is %d.\n",
+		uc_reg_value * 4);
+
+#ifdef FT5X0X_DEBUG
+	dev_dbg(&client->dev, "ft5x0x_ts_probe succssss!\n");
+#endif
+
+	/*enable_irq(irq_no);*/
+	return 0;
+
+exit_input_register_device_failed:
+	input_free_device(input_dev);
+
+exit_input_dev_alloc_failed:
+	/*free_irq(client->irq, ft5x0x_ts);*/
+
+exit_irq_request_failed:
+	i2c_set_clientdata(client, NULL);
+	/*kfree(ft5x0x_ts);*/
+
+exit_alloc_data_failed:
+exit_check_functionality_failed:
+	return err;
+}
+
+
+static int ft5x0x_ts_remove(struct i2c_client *client)
+{
+	struct ft5x0x_ts_data *ft5x0x_ts;
+
+	ft5x0x_ts = i2c_get_clientdata(client);
+	input_unregister_device(ft5x0x_ts->input_dev);
+	#ifdef CONFIG_PM
+	/*gpio_free(ft5x0x_ts->pdata->reset);*/
+	#endif
+	/*free_irq(client->irq, ft5x0x_ts);*/
+	/*kfree(ft5x0x_ts);*/
+	i2c_set_clientdata(client, NULL);
+	return 0;
+}
+
+static const struct i2c_device_id ft5x0x_ts_id[] = {
+	{FT5X0X_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, ft5x0x_ts_id);
+
+static const struct of_device_id lemaker_tp_of_match[] = {
+	{ .compatible = "FocalTech,ft5506", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, lemaker_tp_of_match);
+
+
+static struct i2c_driver ft5x0x_ts_driver = {
+	.probe = ft5x0x_ts_probe,
+	.remove = ft5x0x_ts_remove,
+	.id_table = ft5x0x_ts_id,
+	.driver = {
+		   .name = FT5X0X_NAME,
+		   .owner = THIS_MODULE,
+		   .of_match_table = lemaker_tp_of_match,
+		   },
+};
+
+static int __init ft5x0x_ts_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&ft5x0x_ts_driver);
+	if (ret) {
+		pr_err("Adding ft5x0x driver failed errno = %d\n", ret);
+	} else {
+		pr_info("Successfully added driver %s\n",
+			ft5x0x_ts_driver.driver.name);
+	}
+
+	return ret;
+}
+
+static void __exit ft5x0x_ts_exit(void)
+{
+	i2c_del_driver(&ft5x0x_ts_driver);
+}
+
+
+module_init(ft5x0x_ts_init);
+module_exit(ft5x0x_ts_exit);
+
+MODULE_AUTHOR("<luowj>");
+MODULE_DESCRIPTION("FocalTech ft5x0x TouchScreen driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 546d05f4358a..d55d656c8d5a 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -78,6 +78,14 @@ config STI_MBOX
 	  Mailbox implementation for STMicroelectonics family chips with
 	  hardware for interprocessor communication.
 
+config HI6220_MBOX
+	tristate "Hi6220 Mailbox"
+	depends on ARCH_HISI
+	help
+	  An implementation of the hi6220 mailbox. It is used to send message
+	  between application processors and MCU. Say Y here if you want to build
+	  the Hi6220 mailbox controller driver.
+
 config MAILBOX_TEST
 	tristate "Mailbox Test Client"
 	depends on OF
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 92435ef11f26..565adda57e52 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -17,3 +17,5 @@ obj-$(CONFIG_ALTERA_MBOX)	+= mailbox-altera.o
 obj-$(CONFIG_BCM2835_MBOX)	+= bcm2835-mailbox.o
 
 obj-$(CONFIG_STI_MBOX)		+= mailbox-sti.o
+
+obj-$(CONFIG_HI6220_MBOX)	+= hi6220-mailbox.o
diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c
new file mode 100644
index 000000000000..c0e19d574a57
--- /dev/null
+++ b/drivers/mailbox/hi6220-mailbox.c
@@ -0,0 +1,371 @@
+/*
+ * Hisilicon's Hi6220 mailbox driver
+ *
+ * Copyright (c) 2015 Hisilicon Limited.
+ * Copyright (c) 2015 Linaro Limited.
+ *
+ * Author: Leo Yan <leo.yan@linaro.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kfifo.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define MBOX_CHAN_MAX			32
+#define MBOX_CHAN_NUM			2
+
+#define MBOX_RX				0x0
+#define MBOX_TX				0x1
+
+/* Mailbox message length: 8 words */
+#define MBOX_MSG_LEN			8
+
+/* Mailbox Registers */
+#define MBOX_OFF(m)			(0x40 * (m))
+#define MBOX_MODE_REG(m)		(MBOX_OFF(m) + 0x0)
+#define MBOX_DATA_REG(m)		(MBOX_OFF(m) + 0x4)
+
+#define MBOX_STATE_MASK			(0xF << 4)
+#define MBOX_STATE_IDLE			(0x1 << 4)
+#define MBOX_STATE_TX			(0x2 << 4)
+#define MBOX_STATE_RX			(0x4 << 4)
+#define MBOX_STATE_ACK			(0x8 << 4)
+#define MBOX_ACK_CONFIG_MASK		(0x1 << 0)
+#define MBOX_ACK_AUTOMATIC		(0x1 << 0)
+#define MBOX_ACK_IRQ			(0x0 << 0)
+
+/* IPC registers */
+#define ACK_INT_RAW_REG(i)		((i) + 0x400)
+#define ACK_INT_MSK_REG(i)		((i) + 0x404)
+#define ACK_INT_STAT_REG(i)		((i) + 0x408)
+#define ACK_INT_CLR_REG(i)		((i) + 0x40c)
+#define ACK_INT_ENA_REG(i)		((i) + 0x500)
+#define ACK_INT_DIS_REG(i)		((i) + 0x504)
+#define DST_INT_RAW_REG(i)		((i) + 0x420)
+
+
+struct hi6220_mbox_chan {
+
+	/*
+	 * Description for channel's hardware info:
+	 *  - direction: tx or rx
+	 *  - dst irq: peer core's irq number
+	 *  - ack irq: local irq number
+	 *  - slot number
+	 */
+	unsigned int dir, dst_irq, ack_irq;
+	unsigned int slot;
+
+	struct hi6220_mbox *parent;
+};
+
+struct hi6220_mbox {
+	struct device *dev;
+
+	int irq;
+
+	/* flag of enabling tx's irq mode */
+	bool tx_irq_mode;
+
+	/* region for ipc event */
+	void __iomem *ipc;
+
+	/* region for mailbox */
+	void __iomem *base;
+
+	unsigned int chan_num;
+	struct hi6220_mbox_chan *mchan;
+
+	void *irq_map_chan[MBOX_CHAN_MAX];
+	struct mbox_chan *chan;
+	struct mbox_controller controller;
+};
+
+static void mbox_set_state(struct hi6220_mbox *mbox,
+			   unsigned int slot, u32 val)
+{
+	u32 status;
+
+	status = readl(mbox->base + MBOX_MODE_REG(slot));
+	status = (status & ~MBOX_STATE_MASK) | val;
+	writel(status, mbox->base + MBOX_MODE_REG(slot));
+}
+
+static void mbox_set_mode(struct hi6220_mbox *mbox,
+			  unsigned int slot, u32 val)
+{
+	u32 mode;
+
+	mode = readl(mbox->base + MBOX_MODE_REG(slot));
+	mode = (mode & ~MBOX_ACK_CONFIG_MASK) | val;
+	writel(mode, mbox->base + MBOX_MODE_REG(slot));
+}
+
+static bool hi6220_mbox_last_tx_done(struct mbox_chan *chan)
+{
+	struct hi6220_mbox_chan *mchan = chan->con_priv;
+	struct hi6220_mbox *mbox = mchan->parent;
+	u32 state;
+
+	/* Only set idle state for polling mode */
+	BUG_ON(mbox->tx_irq_mode);
+
+	state = readl(mbox->base + MBOX_MODE_REG(mchan->slot));
+	return ((state & MBOX_STATE_MASK) == MBOX_STATE_IDLE);
+}
+
+static int hi6220_mbox_send_data(struct mbox_chan *chan, void *msg)
+{
+	struct hi6220_mbox_chan *mchan = chan->con_priv;
+	struct hi6220_mbox *mbox = mchan->parent;
+	unsigned int slot = mchan->slot;
+	u32 *buf = msg;
+	int i;
+
+	mbox_set_state(mbox, slot, MBOX_STATE_TX);
+
+	if (mbox->tx_irq_mode)
+		mbox_set_mode(mbox, slot, MBOX_ACK_IRQ);
+	else
+		mbox_set_mode(mbox, slot, MBOX_ACK_AUTOMATIC);
+
+	for (i = 0; i < MBOX_MSG_LEN; i++)
+		writel(buf[i], mbox->base + MBOX_DATA_REG(slot) + i * 4);
+
+	/* trigger remote request */
+	writel(BIT(mchan->dst_irq), DST_INT_RAW_REG(mbox->ipc));
+	return 0;
+}
+
+static irqreturn_t hi6220_mbox_interrupt(int irq, void *p)
+{
+	struct hi6220_mbox *mbox = p;
+	struct hi6220_mbox_chan *mchan;
+	struct mbox_chan *chan;
+	unsigned int state, intr_bit, i;
+	u32 msg[MBOX_MSG_LEN];
+
+	state = readl(ACK_INT_STAT_REG(mbox->ipc));
+	if (!state) {
+		dev_warn(mbox->dev, "%s: spurious interrupt\n",
+			 __func__);
+		return IRQ_HANDLED;
+	}
+
+	while (state) {
+		intr_bit = __ffs(state);
+		state &= (state - 1);
+
+		chan = mbox->irq_map_chan[intr_bit];
+		if (!chan) {
+			dev_warn(mbox->dev, "%s: unexpected irq vector %d\n",
+				 __func__, intr_bit);
+			continue;
+		}
+
+		mchan = chan->con_priv;
+		if (mchan->dir == MBOX_TX)
+			mbox_chan_txdone(chan, 0);
+		else {
+			for (i = 0; i < MBOX_MSG_LEN; i++)
+				msg[i] = readl(mbox->base +
+					MBOX_DATA_REG(mchan->slot) + i * 4);
+
+			mbox_chan_received_data(chan, (void *)msg);
+		}
+
+		/* clear IRQ source */
+		writel(BIT(mchan->ack_irq), ACK_INT_CLR_REG(mbox->ipc));
+		mbox_set_state(mbox, mchan->slot, MBOX_STATE_IDLE);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hi6220_mbox_startup(struct mbox_chan *chan)
+{
+	struct hi6220_mbox_chan *mchan = chan->con_priv;
+	struct hi6220_mbox *mbox = mchan->parent;
+
+	mbox->irq_map_chan[mchan->ack_irq] = (void *)chan;
+
+	/* enable interrupt */
+	writel(BIT(mchan->ack_irq), ACK_INT_ENA_REG(mbox->ipc));
+	return 0;
+}
+
+static void hi6220_mbox_shutdown(struct mbox_chan *chan)
+{
+	struct hi6220_mbox_chan *mchan = chan->con_priv;
+	struct hi6220_mbox *mbox = mchan->parent;
+
+	/* disable interrupt */
+	writel(BIT(mchan->ack_irq), ACK_INT_DIS_REG(mbox->ipc));
+	mbox->irq_map_chan[mchan->ack_irq] = NULL;
+}
+
+static struct mbox_chan_ops hi6220_mbox_chan_ops = {
+	.send_data    = hi6220_mbox_send_data,
+	.startup      = hi6220_mbox_startup,
+	.shutdown     = hi6220_mbox_shutdown,
+	.last_tx_done = hi6220_mbox_last_tx_done,
+};
+
+static void hi6220_mbox_init_hw(struct hi6220_mbox *mbox)
+{
+	struct hi6220_mbox_chan init_data[MBOX_CHAN_NUM] = {
+		{ MBOX_RX, 1, 10 },
+		{ MBOX_TX, 0, 11 },
+	};
+	struct hi6220_mbox_chan *mchan = mbox->mchan;
+	int i;
+
+	for (i = 0; i < MBOX_CHAN_NUM; i++) {
+		memcpy(&mchan[i], &init_data[i], sizeof(*mchan));
+		mchan[i].slot = i;
+		mchan[i].parent = mbox;
+	}
+
+	/* mask and clear all interrupt vectors */
+	writel(0x0,  ACK_INT_MSK_REG(mbox->ipc));
+	writel(~0x0, ACK_INT_CLR_REG(mbox->ipc));
+
+	/* use interrupt for tx's ack */
+	mbox->tx_irq_mode = true;
+}
+
+static const struct of_device_id hi6220_mbox_of_match[] = {
+	{ .compatible = "hisilicon,hi6220-mbox", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, hi6220_mbox_of_match);
+
+static int hi6220_mbox_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hi6220_mbox *mbox;
+	struct resource *res;
+	int i, err;
+
+	mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	mbox->dev = dev;
+	mbox->chan_num = MBOX_CHAN_NUM;
+	mbox->mchan = devm_kzalloc(dev,
+		mbox->chan_num * sizeof(*mbox->mchan), GFP_KERNEL);
+	if (!mbox->mchan)
+		return -ENOMEM;
+
+	mbox->chan = devm_kzalloc(dev,
+		mbox->chan_num * sizeof(*mbox->chan), GFP_KERNEL);
+	if (!mbox->chan)
+		return -ENOMEM;
+
+	mbox->irq = platform_get_irq(pdev, 0);
+	if (mbox->irq < 0)
+		return mbox->irq;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mbox->ipc = devm_ioremap_resource(dev, res);
+	if (IS_ERR(mbox->ipc)) {
+		dev_err(dev, "ioremap ipc failed\n");
+		return PTR_ERR(mbox->ipc);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	mbox->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(mbox->base)) {
+		dev_err(dev, "ioremap buffer failed\n");
+		return PTR_ERR(mbox->base);
+	}
+
+	err = devm_request_irq(dev, mbox->irq, hi6220_mbox_interrupt, 0,
+			dev_name(dev), mbox);
+	if (err) {
+		dev_err(dev, "Failed to register a mailbox IRQ handler: %d\n",
+			err);
+		return -ENODEV;
+	}
+
+	/* init hardware parameters */
+	hi6220_mbox_init_hw(mbox);
+
+	for (i = 0; i < mbox->chan_num; i++) {
+		mbox->chan[i].con_priv = &mbox->mchan[i];
+		mbox->irq_map_chan[i] = NULL;
+	}
+
+	mbox->controller.dev = dev;
+	mbox->controller.chans = &mbox->chan[0];
+	mbox->controller.num_chans = mbox->chan_num;
+	mbox->controller.ops = &hi6220_mbox_chan_ops;
+
+	if (mbox->tx_irq_mode)
+		mbox->controller.txdone_irq = true;
+	else {
+		mbox->controller.txdone_poll = true;
+		mbox->controller.txpoll_period = 5;
+	}
+
+	err = mbox_controller_register(&mbox->controller);
+	if (err) {
+		dev_err(dev, "Failed to register mailbox %d\n", err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, mbox);
+	dev_info(dev, "Mailbox enabled\n");
+	return 0;
+}
+
+static int hi6220_mbox_remove(struct platform_device *pdev)
+{
+	struct hi6220_mbox *mbox = platform_get_drvdata(pdev);
+
+	mbox_controller_unregister(&mbox->controller);
+	return 0;
+}
+
+static struct platform_driver hi6220_mbox_driver = {
+	.driver = {
+		.name = "hi6220-mbox",
+		.owner = THIS_MODULE,
+		.of_match_table = hi6220_mbox_of_match,
+	},
+	.probe	= hi6220_mbox_probe,
+	.remove	= hi6220_mbox_remove,
+};
+
+static int __init hi6220_mbox_init(void)
+{
+	return platform_driver_register(&hi6220_mbox_driver);
+}
+core_initcall(hi6220_mbox_init);
+
+static void __exit hi6220_mbox_exit(void)
+{
+	platform_driver_unregister(&hi6220_mbox_driver);
+}
+module_exit(hi6220_mbox_exit);
+
+MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
+MODULE_DESCRIPTION("Hi6220 mailbox driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 7398262a2fab..0226aacb2f0c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -284,6 +284,16 @@ config MFD_HI6421_PMIC
 	  menus in order to enable them.
 	  We communicate with the Hi6421 via memory-mapped I/O.
 
+config MFD_HI655X_PMIC
+	tristate "HiSilicon Hi655X series PMU/Codec IC"
+	depends on ARCH_HISI || COMPILE_TEST
+	depends on OF
+	select MFD_CORE
+	select REGMAP_MMIO
+	select REGMAP_IRQ
+	help
+	  Select this option to enable Hisilicon hi655x series pmic driver.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a8b76b81b467..6a7b0e1fe6ba 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -186,6 +186,7 @@ obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
 obj-$(CONFIG_MFD_IPAQ_MICRO)	+= ipaq-micro.o
 obj-$(CONFIG_MFD_MENF21BMC)	+= menf21bmc.o
 obj-$(CONFIG_MFD_HI6421_PMIC)	+= hi6421-pmic-core.o
+obj-$(CONFIG_MFD_HI655X_PMIC)   += hi655x-pmic.o
 obj-$(CONFIG_MFD_DLN2)		+= dln2.o
 obj-$(CONFIG_MFD_RT5033)	+= rt5033.o
 obj-$(CONFIG_MFD_SKY81452)	+= sky81452.o
diff --git a/drivers/mfd/hi655x-pmic.c b/drivers/mfd/hi655x-pmic.c
new file mode 100644
index 000000000000..20607af75927
--- /dev/null
+++ b/drivers/mfd/hi655x-pmic.c
@@ -0,0 +1,176 @@
+/*
+ * Device driver for MFD hi655x PMIC
+ *
+ * Copyright (c) 2016 Hisilicon.
+ *
+ * Authors:
+ * Chen Feng <puck.chen@hisilicon.com>
+ * Fei  Wang <w.f@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/hi655x-pmic.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+static const struct mfd_cell hi655x_pmic_devs[] = {
+	{ .name = "hi655x-regulator", },
+};
+
+static const struct regmap_irq hi655x_irqs[] = {
+	{ .reg_offset = 0, .mask = OTMP_D1R_INT },
+	{ .reg_offset = 0, .mask = VSYS_2P5_R_INT },
+	{ .reg_offset = 0, .mask = VSYS_UV_D3R_INT },
+	{ .reg_offset = 0, .mask = VSYS_6P0_D200UR_INT },
+	{ .reg_offset = 0, .mask = PWRON_D4SR_INT },
+	{ .reg_offset = 0, .mask = PWRON_D20F_INT },
+	{ .reg_offset = 0, .mask = PWRON_D20R_INT },
+	{ .reg_offset = 0, .mask = RESERVE_INT },
+};
+
+static struct of_device_id of_hi655x_pmic_child_match_tbl[] = {
+	{ .compatible = "hisilicon,hi6552-regulator-pmic", },
+	{ .compatible = "hisilicon,hi6552-powerkey", },
+	{ .compatible = "hisilicon,hi6552-usbvbus", },
+	{ .compatible = "hisilicon,hi6552-coul", },
+	{ .compatible = "hisilicon,hi6552-pmu-rtc", },
+	{ .compatible = "hisilicon,hi6552-pmic-mntn", },
+	{ /* end */ }
+};
+
+static const struct regmap_irq_chip hi655x_irq_chip = {
+	.name = "hi655x-pmic",
+	.irqs = hi655x_irqs,
+	.num_regs = 1,
+	.num_irqs = ARRAY_SIZE(hi655x_irqs),
+	.status_base = HI655X_IRQ_STAT_BASE,
+	.ack_base = HI655X_IRQ_STAT_BASE,
+	.mask_base = HI655X_IRQ_MASK_BASE,
+};
+
+static struct regmap_config hi655x_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = HI655X_STRIDE,
+	.val_bits = 8,
+	.max_register = HI655X_BUS_ADDR(0xFFF),
+};
+
+static void hi655x_local_irq_clear(struct regmap *map)
+{
+	int i;
+
+	regmap_write(map, HI655X_ANA_IRQM_BASE, HI655X_IRQ_CLR);
+	for (i = 0; i < HI655X_IRQ_ARRAY; i++) {
+		regmap_write(map, HI655X_IRQ_STAT_BASE + i * HI655X_STRIDE,
+			     HI655X_IRQ_CLR);
+	}
+}
+
+static int hi655x_pmic_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct hi655x_pmic *pmic;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	void __iomem *base;
+
+	pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+	pmic->dev = dev;
+
+	pmic->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!pmic->res)
+		return -ENOENT;
+
+	base = devm_ioremap_resource(dev, pmic->res);
+	if (!base)
+		return -ENOMEM;
+
+	pmic->regmap = devm_regmap_init_mmio_clk(dev, NULL, base,
+						 &hi655x_regmap_config);
+
+	regmap_read(pmic->regmap, HI655X_BUS_ADDR(HI655X_VER_REG), &pmic->ver);
+	if ((pmic->ver < PMU_VER_START) || (pmic->ver > PMU_VER_END)) {
+		dev_warn(dev, "PMU version %d unsupported\n", pmic->ver);
+		return -EINVAL;
+	}
+
+	hi655x_local_irq_clear(pmic->regmap);
+
+	pmic->gpio = of_get_named_gpio(np, "pmic-gpios", 0);
+	if (!gpio_is_valid(pmic->gpio)) {
+		dev_err(dev, "Failed to get the pmic-gpios\n");
+		return -ENODEV;
+	}
+
+	ret = devm_gpio_request_one(dev, pmic->gpio, GPIOF_IN,
+				    "hi655x_pmic_irq");
+	if (ret < 0) {
+		dev_err(dev, "Failed to request gpio %d  ret = %d\n",
+			pmic->gpio, ret);
+		return ret;
+	}
+
+	ret = regmap_add_irq_chip(pmic->regmap, gpio_to_irq(pmic->gpio),
+				  IRQF_TRIGGER_LOW | IRQF_NO_SUSPEND, 0,
+				  &hi655x_irq_chip, &pmic->irq_data);
+	if (ret) {
+		dev_err(dev, "Failed to obtain 'hi655x_pmic_irq' %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, pmic);
+
+	/* populate sub nodes */
+	of_platform_populate(np, of_hi655x_pmic_child_match_tbl, NULL, dev);
+
+	ret = mfd_add_devices(dev, PLATFORM_DEVID_AUTO, hi655x_pmic_devs,
+			      ARRAY_SIZE(hi655x_pmic_devs), NULL, 0, NULL);
+	if (ret) {
+		dev_err(dev, "Failed to register device %d\n", ret);
+		regmap_del_irq_chip(gpio_to_irq(pmic->gpio), pmic->irq_data);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hi655x_pmic_remove(struct platform_device *pdev)
+{
+	struct hi655x_pmic *pmic = platform_get_drvdata(pdev);
+
+	regmap_del_irq_chip(gpio_to_irq(pmic->gpio), pmic->irq_data);
+	mfd_remove_devices(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id hi655x_pmic_match[] = {
+	{ .compatible = "hisilicon,hi655x-pmic", },
+	{},
+};
+
+static struct platform_driver hi655x_pmic_driver = {
+	.driver	= {
+		.name =	"hi655x-pmic",
+		.of_match_table = of_match_ptr(hi655x_pmic_match),
+	},
+	.probe  = hi655x_pmic_probe,
+	.remove = hi655x_pmic_remove,
+};
+module_platform_driver(hi655x_pmic_driver);
+
+MODULE_AUTHOR("Chen Feng <puck.chen@hisilicon.com>");
+MODULE_DESCRIPTION("Hisilicon hi655x PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 4bf7d50b1bc7..c4bbd7280e9d 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -466,6 +466,14 @@ config BMP085_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called bmp085-spi.
 
+config HI6220_SYSCFG
+	bool "Hisilicon HI6220 System Configuration driver"
+	depends on ARCH_HISI
+	default y
+	help
+	  Hisilicon HI6220 uses some registers to configure some chip hosts to
+          work or not, e.g. disable the UART hosts reset and let's them work.
+
 config PCH_PHUB
 	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
 	select GENERIC_NET_UTILS
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 537d7f3b78da..b84697a9010b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
 obj-$(CONFIG_BMP085)		+= bmp085.o
 obj-$(CONFIG_BMP085_I2C)	+= bmp085-i2c.o
 obj-$(CONFIG_BMP085_SPI)	+= bmp085-spi.o
+obj-$(CONFIG_HI6220_SYSCFG)     += hi6220-sysconfig.o
 obj-$(CONFIG_DUMMY_IRQ)		+= dummy-irq.o
 obj-$(CONFIG_ICS932S401)	+= ics932s401.o
 obj-$(CONFIG_LKDTM)		+= lkdtm.o
diff --git a/drivers/misc/hi6220-sysconfig.c b/drivers/misc/hi6220-sysconfig.c
new file mode 100644
index 000000000000..9b9d5d8bff19
--- /dev/null
+++ b/drivers/misc/hi6220-sysconfig.c
@@ -0,0 +1,72 @@
+/*
+ * For Hisilicon Hi6220 SoC, the reset of some hosts (e.g. UART) should be disabled
+ * before using them, this driver will handle the host chip reset disable.
+ *
+ * Copyright (C) 2015 Hisilicon Ltd.
+ * Author: Bintian Wang <bintian.wang@huawei.com>
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#define reset_offset 0x334
+#define pclk_offset 0x230
+#define PMUSSI_REG_EX(pmu_base, reg_addr) (((reg_addr) << 2) + (char *)pmu_base)
+
+static int __init hi6220_sysconf(void)
+{
+        static void __iomem *base = NULL;
+        struct device_node *node;
+        static void __iomem *base1 = NULL;
+        struct device_node *node1;
+	unsigned char ret;
+
+        node = of_find_compatible_node(NULL, NULL, "hisilicon,hi6220-sysctrl");
+        if (!node)
+                return -ENOENT;
+
+        base = of_iomap(node, 0);
+        if (base == NULL) {
+                printk(KERN_ERR "hi6220: sysctrl reg iomap failed!\n");
+                return -ENOMEM;
+        }
+
+        /*Disable UART1 reset and set pclk*/
+        writel(BIT(5), base + reset_offset);
+        writel(BIT(5), base + pclk_offset);
+
+        /*Disable UART2 reset and set pclk*/
+        writel(BIT(6), base + reset_offset);
+        writel(BIT(6), base + pclk_offset);
+
+        /*Disable UART3 reset and set pclk*/
+        writel(BIT(7), base + reset_offset);
+        writel(BIT(7), base + pclk_offset);
+
+        /*Disable UART4 reset and set pclk*/
+        writel(BIT(8), base + reset_offset);
+        writel(BIT(8), base + pclk_offset);
+
+        iounmap(base);
+
+        node1 = of_find_compatible_node(NULL, NULL, "hisilicon,hi655x-pmic");
+        if (!node1)
+                return -ENOENT;
+
+        base1 = of_iomap(node1, 0);
+        if (base1 == NULL) {
+                printk(KERN_ERR "hi6220: pmic reg iomap failed!\n");
+                return -ENOMEM;
+        }
+
+	/*enable clk for BT/WIFI*/
+	ret = *(volatile unsigned char*)PMUSSI_REG_EX(base1, 0x1c);
+	ret |= 0x40;
+	*(volatile unsigned char*)PMUSSI_REG_EX(base1, 0x1c) = ret;
+
+        iounmap(base1);
+        return 0;
+}
+postcore_initcall(hi6220_sysconf);
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
index f34dcc514730..f2df2c7352e2 100644
--- a/drivers/misc/ti-st/Kconfig
+++ b/drivers/misc/ti-st/Kconfig
@@ -14,4 +14,12 @@ config TI_ST
 	  are returned to relevant protocol drivers based on their
 	  packet types.
 
+config ST_HCI
+	tristate "HCI TTY emulation driver for Bluetooth"
+	depends on TI_ST
+	help
+	  This enables the TTY device like emulation for HCI used by
+	  user-space Bluetooth stacks.
+	  It will provide a character device for user space Bluetooth stack to
+	  send/receive data over shared transport.
 endmenu
diff --git a/drivers/misc/ti-st/Makefile b/drivers/misc/ti-st/Makefile
index 78d7ebb14749..8fa936fa8f51 100644
--- a/drivers/misc/ti-st/Makefile
+++ b/drivers/misc/ti-st/Makefile
@@ -4,3 +4,6 @@
 #
 obj-$(CONFIG_TI_ST) 		+= st_drv.o
 st_drv-objs			:= st_core.o st_kim.o st_ll.o
+obj-$(CONFIG_ST_HCI)		+= tty_hci.o
+
+ccflags-y += -Inet/bluetooth
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 71b64550b591..ed36722da9e9 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -36,6 +36,8 @@
 #include <linux/skbuff.h>
 #include <linux/ti_wilink_st.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #define MAX_ST_DEVICES	3	/* Imagine 1 on each UART for now */
 static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
@@ -43,6 +45,9 @@ static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
 /**********************************************************************/
 /* internal functions */
 
+struct ti_st_plat_data	*dt_pdata;
+static struct ti_st_plat_data *get_platform_data(struct device *dev);
+
 /**
  * st_get_plat_device -
  *	function which returns the reference to the platform device
@@ -464,7 +469,12 @@ long st_kim_start(void *kim_data)
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
 
 	pr_info(" %s", __func__);
-	pdata = kim_gdata->kim_pdev->dev.platform_data;
+	if (kim_gdata->kim_pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = kim_gdata->kim_pdev->dev.platform_data;
+	}
 
 	do {
 		/* platform specific enabling code here */
@@ -524,12 +534,18 @@ long st_kim_stop(void *kim_data)
 {
 	long err = 0;
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
-	struct ti_st_plat_data	*pdata =
-		kim_gdata->kim_pdev->dev.platform_data;
+	struct ti_st_plat_data	*pdata;
 	struct tty_struct	*tty = kim_gdata->core_data->tty;
 
 	reinit_completion(&kim_gdata->ldisc_installed);
 
+	if (kim_gdata->kim_pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else
+		pdata = kim_gdata->kim_pdev->dev.platform_data;
+
+
 	if (tty) {	/* can be called before ldisc is installed */
 		/* Flush any pending characters in the driver and discipline. */
 		tty_ldisc_flush(tty);
@@ -721,13 +737,52 @@ static const struct file_operations list_debugfs_fops = {
  * board-*.c file
  */
 
+static const struct of_device_id kim_of_match[] = {
+{
+	.compatible = "kim",
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, kim_of_match);
+
+static struct ti_st_plat_data *get_platform_data(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	const u32 *dt_property;
+	int len;
+
+	dt_pdata = kzalloc(sizeof(*dt_pdata), GFP_KERNEL);
+	if (!dt_pdata)
+		return NULL;
+
+	dt_property = of_get_property(np, "dev_name", &len);
+	if (dt_property)
+		memcpy(&dt_pdata->dev_name, dt_property, len);
+	of_property_read_u32(np, "nshutdown_gpio",
+			     &dt_pdata->nshutdown_gpio);
+	of_property_read_u32(np, "flow_cntrl", &dt_pdata->flow_cntrl);
+	of_property_read_u32(np, "baud_rate", &dt_pdata->baud_rate);
+
+	return dt_pdata;
+}
+
 static struct dentry *kim_debugfs_dir;
 static int kim_probe(struct platform_device *pdev)
 {
 	struct kim_data_s	*kim_gdata;
-	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct ti_st_plat_data	*pdata;
 	int err;
 
+	if (pdev->dev.of_node)
+		pdata = get_platform_data(&pdev->dev);
+	else
+		pdata = pdev->dev.platform_data;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "Platform Data is missing\n");
+		return -ENXIO;
+	}
+
 	if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) {
 		/* multiple devices could exist */
 		st_kim_devices[pdev->id] = pdev;
@@ -808,9 +863,16 @@ err_core_init:
 static int kim_remove(struct platform_device *pdev)
 {
 	/* free the GPIOs requested */
-	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct ti_st_plat_data	*pdata;
 	struct kim_data_s	*kim_gdata;
 
+	if (pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = pdev->dev.platform_data;
+	}
+
 	kim_gdata = platform_get_drvdata(pdev);
 
 	/* Free the Bluetooth/FM/GPIO
@@ -828,12 +890,22 @@ static int kim_remove(struct platform_device *pdev)
 
 	kfree(kim_gdata);
 	kim_gdata = NULL;
+	kfree(dt_pdata);
+	dt_pdata = NULL;
+
 	return 0;
 }
 
 static int kim_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct ti_st_plat_data	*pdata;
+
+	if (pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = pdev->dev.platform_data;
+	}
 
 	if (pdata->suspend)
 		return pdata->suspend(pdev, state);
@@ -843,7 +915,14 @@ static int kim_suspend(struct platform_device *pdev, pm_message_t state)
 
 static int kim_resume(struct platform_device *pdev)
 {
-	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct ti_st_plat_data	*pdata;
+
+	if (pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = pdev->dev.platform_data;
+	}
 
 	if (pdata->resume)
 		return pdata->resume(pdev);
@@ -860,6 +939,7 @@ static struct platform_driver kim_platform_driver = {
 	.resume = kim_resume,
 	.driver = {
 		.name = "kim",
+		.of_match_table = of_match_ptr(kim_of_match),
 	},
 };
 
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c
index 93b4d67cc4a3..518e1b7f2f95 100644
--- a/drivers/misc/ti-st/st_ll.c
+++ b/drivers/misc/ti-st/st_ll.c
@@ -26,6 +26,7 @@
 #include <linux/ti_wilink_st.h>
 
 /**********************************************************************/
+
 /* internal functions */
 static void send_ll_cmd(struct st_data_s *st_data,
 	unsigned char cmd)
@@ -53,7 +54,13 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data)
 
 	/* communicate to platform about chip asleep */
 	kim_data = st_data->kim_data;
-	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (kim_data->kim_pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = kim_data->kim_pdev->dev.platform_data;
+	}
+
 	if (pdata->chip_asleep)
 		pdata->chip_asleep(NULL);
 }
@@ -86,7 +93,13 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data)
 
 	/* communicate to platform about chip wakeup */
 	kim_data = st_data->kim_data;
-	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (kim_data->kim_pdev->dev.of_node) {
+		pr_debug("use device tree data");
+		pdata = dt_pdata;
+	} else {
+		pdata = kim_data->kim_pdev->dev.platform_data;
+	}
+
 	if (pdata->chip_awake)
 		pdata->chip_awake(NULL);
 }
diff --git a/drivers/misc/ti-st/tty_hci.c b/drivers/misc/ti-st/tty_hci.c
new file mode 100644
index 000000000000..8a1c96bf5e74
--- /dev/null
+++ b/drivers/misc/ti-st/tty_hci.c
@@ -0,0 +1,593 @@
+/*
+ *  TTY emulation for user-space Bluetooth stacks over HCI-H4
+ *  Copyright (C) 2011-2012 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+/** define one of the following for debugging
+#define DEBUG
+#define VERBOSE
+*/
+
+#define pr_fmt(fmt) "(hci_tty): " fmt
+#include <linux/module.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+
+#include <linux/uaccess.h>
+#include <linux/tty.h>
+#include <linux/sched.h>
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+
+#include <linux/ti_wilink_st.h>
+#include "led.h"
+
+/* Number of seconds to wait for registration completion
+ * when ST returns PENDING status.
+ */
+#define BT_REGISTER_TIMEOUT   6000	/* 6 sec */
+
+/**
+ * struct ti_st - driver operation structure
+ * @hdev: hci device pointer which binds to bt driver
+ * @reg_status: ST registration callback status
+ * @st_write: write function provided by the ST driver
+ *	to be used by the driver during send_frame.
+ * @wait_reg_completion - completion sync between ti_st_open
+ *	and st_reg_completion_cb.
+ */
+struct ti_st {
+	struct hci_dev *hdev;
+	char reg_status;
+	long (*st_write)(struct sk_buff *);
+	struct completion wait_reg_completion;
+	wait_queue_head_t data_q;
+	struct sk_buff_head rx_list;
+};
+
+#define DEVICE_NAME     "hci_tty"
+
+/***********Functions called from ST driver**********************************/
+/* Called by Shared Transport layer when receive data is
+ * available */
+static long st_receive(void *priv_data, struct sk_buff *skb)
+{
+	struct ti_st	*hst = (void *)priv_data;
+
+	pr_debug("@ %s", __func__);
+#ifdef VERBOSE
+	print_hex_dump(KERN_INFO, ">rx>", DUMP_PREFIX_NONE,
+		       16, 1, skb->data, skb->len, 0);
+#endif
+	skb_queue_tail(&hst->rx_list, skb);
+	wake_up_interruptible(&hst->data_q);
+	return 0;
+}
+
+/* Called by ST layer to indicate protocol registration completion
+ * status.ti_st_open() function will wait for signal from this
+ * API when st_register() function returns ST_PENDING.
+ */
+static void st_reg_completion_cb(void *priv_data, char data)
+{
+	struct ti_st	*lhst = (void *)priv_data;
+
+	pr_info("@ %s\n", __func__);
+	/* Save registration status for use in ti_st_open() */
+	lhst->reg_status = data;
+	/* complete the wait in ti_st_open() */
+	complete(&lhst->wait_reg_completion);
+}
+
+/* protocol structure registered with shared transport */
+#define MAX_BT_CHNL_IDS 3
+static struct st_proto_s ti_st_proto[MAX_BT_CHNL_IDS] = {
+	{
+		.chnl_id = 0x04, /* HCI Events */
+		.hdr_len = 2,
+		.offset_len_in_hdr = 1,
+		.len_size = 1, /* sizeof(plen) in struct hci_event_hdr */
+		.reserve = 8,
+	},
+	{
+		.chnl_id = 0x02, /* ACL */
+		.hdr_len = 4,
+		.offset_len_in_hdr = 2,
+		.len_size = 2,	/* sizeof(dlen) in struct hci_acl_hdr */
+		.reserve = 8,
+	},
+	{
+		.chnl_id = 0x03, /* SCO */
+		.hdr_len = 3,
+		.offset_len_in_hdr = 2,
+		.len_size = 1, /* sizeof(dlen) in struct hci_sco_hdr */
+		.reserve = 8,
+	},
+};
+
+static struct hci_dev *hdev;		/* hci_dev during hci_tty_init */
+/* a dumb callback function for hci_dev open and close */
+static int dumb_hci_callback(struct hci_dev *hdev)
+{
+	pr_info("called %s\n", __func__);
+	return 0;
+}
+
+/* a dumb callback function for hci_dev send */
+static int dumb_hci_send_callback(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	pr_info("called %s\n", __func__);
+	return 0;
+}
+
+/** hci_tty_open Function
+ *  This function will perform an register on ST driver.
+ *
+ *  Parameters :
+ *  @file  : File pointer for BT char driver
+ *  @inod  :
+ *  Returns  0 -  on success
+ *           else suitable error code
+ */
+int hci_tty_open(struct inode *inod, struct file *file)
+{
+	int i = 0, err = 0;
+	unsigned long timeleft;
+	struct ti_st *hst;
+
+	pr_info("inside %s (%p, %p)\n", __func__, inod, file);
+
+	hst = kzalloc(sizeof(*hst), GFP_KERNEL);
+	file->private_data = hst;
+	hst = file->private_data;
+
+	hst->hdev = hdev;
+	for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
+		ti_st_proto[i].priv_data = hst;
+		ti_st_proto[i].max_frame_size = 1026;
+		ti_st_proto[i].recv = st_receive;
+		ti_st_proto[i].reg_complete_cb = st_reg_completion_cb;
+
+		/* Prepare wait-for-completion handler */
+		init_completion(&hst->wait_reg_completion);
+		/* Reset ST registration callback status flag,
+		 * this value will be updated in
+		 * st_reg_completion_cb()
+		 * function whenever it called from ST driver.
+		 */
+		hst->reg_status = -EINPROGRESS;
+
+		err = st_register(&ti_st_proto[i]);
+		if (!err)
+			goto done;
+
+		if (err != -EINPROGRESS) {
+			pr_err("st_register failed %d", err);
+			goto error;
+		}
+
+		/* ST is busy with either protocol
+		 * registration or firmware download.
+		 */
+		pr_debug("waiting for registration completion signal from ST");
+		timeleft = wait_for_completion_timeout
+			(&hst->wait_reg_completion,
+			 msecs_to_jiffies(BT_REGISTER_TIMEOUT));
+		if (!timeleft) {
+			pr_err("Timeout(%d sec),didn't get reg completion signal from ST",
+			       BT_REGISTER_TIMEOUT / 1000);
+			err = -ETIMEDOUT;
+			goto error;
+		}
+
+		/* Is ST registration callback
+		 * called with ERROR status? */
+		if (hst->reg_status != 0) {
+			pr_err("ST registration completed with invalid status %d",
+			       hst->reg_status);
+			err = -EAGAIN;
+			goto error;
+		}
+
+done:
+		hst->st_write = ti_st_proto[i].write;
+		if (!hst->st_write) {
+			pr_err("undefined ST write function");
+			for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
+				/* Undo registration with ST */
+				err = st_unregister(&ti_st_proto[i]);
+				if (err)
+					pr_err("st_unregister() failed with error %d",
+					       err);
+				hst->st_write = NULL;
+			}
+			return -EIO;
+		}
+	}
+
+	skb_queue_head_init(&hst->rx_list);
+	init_waitqueue_head(&hst->data_q);
+
+	return 0;
+
+error:
+	kfree(hst);
+	return err;
+}
+
+/** hci_tty_release Function
+ *  This function will un-registers from the ST driver.
+ *
+ *  Parameters :
+ *  @file  : File pointer for BT char driver
+ *  @inod  :
+ *  Returns  0 -  on success
+ *           else suitable error code
+ */
+int hci_tty_release(struct inode *inod, struct file *file)
+{
+	int err, i;
+	struct ti_st *hst = file->private_data;
+
+	pr_info("inside %s (%p, %p)\n", __func__, inod, file);
+
+	for (i = 0; i < MAX_BT_CHNL_IDS; i++) {
+		err = st_unregister(&ti_st_proto[i]);
+		if (err)
+			pr_err("st_unregister(%d) failed with error %d",
+			       ti_st_proto[i].chnl_id, err);
+	}
+
+	hst->st_write = NULL;
+	skb_queue_purge(&hst->rx_list);
+	kfree(hst);
+	return err;
+}
+
+/** hci_tty_read Function
+ *
+ *  Parameters :
+ *  @file  : File pointer for BT char driver
+ *  @data  : Data which needs to be passed to APP
+ *  @size  : Length of the data passesd
+ *  offset :
+ *  Returns  Size of packet received -  on success
+ *           else suitable error code
+ */
+ssize_t hci_tty_read(struct file *file, char __user *data, size_t size,
+		loff_t *offset)
+{
+	int len = 0, tout;
+	struct sk_buff *skb = NULL, *rskb = NULL;
+	struct ti_st	*hst;
+
+	pr_debug("inside %s (%p, %p, %u, %p)\n",
+		 __func__, file, data, size, offset);
+
+	/* Validate input parameters */
+	if ((NULL == file) || (((NULL == data) || (0 == size)))) {
+		pr_err("Invalid input params passed to %s", __func__);
+		return -EINVAL;
+	}
+
+	hst = file->private_data;
+
+	/* cannot come here if poll-ed before reading
+	 * if not poll-ed wait on the same wait_q
+	 */
+	tout = wait_event_interruptible_timeout(hst->data_q,
+			!skb_queue_empty(&hst->rx_list),
+				msecs_to_jiffies(1000));
+	/* Check for timed out condition */
+	if (0 == tout) {
+		pr_err("Device Read timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	/* hst->rx_list not empty skb already present */
+	skb = skb_dequeue(&hst->rx_list);
+	if (!skb) {
+		pr_err("dequed skb is null?\n");
+		return -EIO;
+	}
+
+#ifdef VERBOSE
+	print_hex_dump(KERN_INFO, ">in>", DUMP_PREFIX_NONE,
+		       16, 1, skb->data, skb->len, 0);
+#endif
+
+	/* Forward the data to the user */
+	if (skb->len >= size) {
+		pr_err("FIONREAD not done before read\n");
+		return -ENOMEM;
+	} else {
+		/* returning skb */
+		rskb = alloc_skb(size, GFP_KERNEL);
+		if (!rskb) {
+			pr_err("alloc_skb error\n");
+			return -ENOMEM;
+		}
+
+		/* cb[0] has the pkt_type 0x04 or 0x02 or 0x03 */
+		memcpy(skb_put(rskb, 1), &skb->cb[0], 1);
+		memcpy(skb_put(rskb, skb->len), skb->data, skb->len);
+
+		bluetooth_led_rx(hst->hdev);
+		if (copy_to_user(data, rskb->data, rskb->len)) {
+			pr_err("unable to copy to user space\n");
+			/* Queue the skb back to head */
+			skb_queue_head(&hst->rx_list, skb);
+			kfree_skb(rskb);
+			return -EIO;
+		}
+	}
+
+	len = rskb->len;	/* len of returning skb */
+	kfree_skb(skb);
+	kfree_skb(rskb);
+	pr_debug("total size read= %d\n", len);
+	return len;
+}
+
+/* hci_tty_write Function
+ *
+ *  Parameters :
+ *  @file   : File pointer for BT char driver
+ *  @data   : packet data from BT application
+ *  @size   : Size of the packet data
+ *  @offset :
+ *  Returns  Size of packet on success
+ *           else suitable error code
+ */
+ssize_t hci_tty_write(struct file *file, const char __user *data,
+		size_t size, loff_t *offset)
+{
+	struct ti_st *hst = file->private_data;
+	struct	sk_buff *skb;
+
+	pr_debug("inside %s (%p, %p, %u, %p)\n",
+		 __func__, file, data, size, offset);
+
+	if (!hst->st_write) {
+		pr_err(" Can't write to ST, hhci_tty->st_write null ?");
+		return -EINVAL;
+	}
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	/* Validate Created SKB */
+	if (NULL == skb) {
+		pr_err("Error aaloacting SKB");
+		return -ENOMEM;
+	}
+
+	/* Forward the data from the user space to ST core */
+	if (copy_from_user(skb_put(skb, size), data, size)) {
+		pr_err(" Unable to copy from user space");
+		kfree_skb(skb);
+		return -EIO;
+	}
+
+	bluetooth_led_tx(hst->hdev);
+
+#ifdef VERBOSE
+	pr_debug("start data..");
+	print_hex_dump(KERN_INFO, "<out<", DUMP_PREFIX_NONE,
+		       16, 1, skb->data, size, 0);
+	pr_debug("\n..end data");
+#endif
+
+	hst->st_write(skb);
+	return size;
+}
+
+/** hci_tty_ioctl Function
+ *  This will peform the functions as directed by the command and command
+ *  argument.
+ *
+ *  Parameters :
+ *  @file  : File pointer for BT char driver
+ *  @cmd   : IOCTL Command
+ *  @arg   : Command argument for IOCTL command
+ *  Returns  0 on success
+ *           else suitable error code
+ */
+static long hci_tty_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	struct sk_buff *skb = NULL;
+	int		retcode = 0;
+	struct ti_st	*hst;
+
+	pr_debug("inside %s (%p, %u, %lx)", __func__, file, cmd, arg);
+
+	/* Validate input parameters */
+	if ((NULL == file) || (0 == cmd)) {
+		pr_err("invalid input parameters passed to %s", __func__);
+		return -EINVAL;
+	}
+
+	hst = file->private_data;
+
+	switch (cmd) {
+	case FIONREAD:
+		/* Deque the SKB from the head if rx_list is not empty
+		 * update the argument with skb->len to provide amount of data
+		 * available in the available SKB +1 for the PKT_TYPE
+		 * field not provided in data by TI-ST.
+		 */
+		skb = skb_dequeue(&hst->rx_list);
+		if (skb != NULL) {
+			*(unsigned int *)arg = skb->len + 1;
+			/* Re-Store the SKB for furtur Read operations */
+			skb_queue_head(&hst->rx_list, skb);
+		} else {
+			*(unsigned int *)arg = 0;
+		}
+		pr_debug("returning %d\n", *(unsigned int *)arg);
+		break;
+	default:
+		pr_debug("Un-Identified IOCTL %d", cmd);
+		retcode = 0;
+		break;
+	}
+
+	return retcode;
+}
+
+/** hci_tty_poll Function
+ *  This function will wait till some data is received to the hci_tty driver from ST
+ *
+ *  Parameters :
+ *  @file  : File pointer for BT char driver
+ *  @wait  : POLL wait information
+ *  Returns  status of POLL on success
+ *           else suitable error code
+ */
+static unsigned int hci_tty_poll(struct file *file, poll_table *wait)
+{
+	struct ti_st	*hst = file->private_data;
+	unsigned long mask = 0;
+
+	pr_debug("@ %s\n", __func__);
+
+	/* wait to be completed by st_receive */
+	poll_wait(file, &hst->data_q, wait);
+	pr_debug("poll broke\n");
+
+	if (!skb_queue_empty(&hst->rx_list)) {
+		pr_debug("rx list que !empty\n");
+		mask |= POLLIN;	/* TODO: check app for mask */
+	}
+
+	return mask;
+}
+
+/* BT Char driver function pointers
+ * These functions are called from USER space by pefroming File Operations
+ * on /dev/hci_tty node exposed by this driver during init
+ */
+const struct file_operations hci_tty_chrdev_ops = {
+	.owner = THIS_MODULE,
+	.open = hci_tty_open,
+	.read = hci_tty_read,
+	.write = hci_tty_write,
+	.unlocked_ioctl = hci_tty_ioctl,
+	.poll = hci_tty_poll,
+	.release = hci_tty_release,
+};
+
+/*********Functions called during insmod and delmod****************************/
+
+static int hci_tty_major;		/* major number */
+static struct class *hci_tty_class;	/* class during class_create */
+static struct device *hci_tty_dev;	/* dev during device_create */
+/** hci_tty_init Function
+ *  This function Initializes the hci_tty driver parametes and exposes
+ *  /dev/hci_tty node to user space
+ *
+ *  Parameters : NULL
+ *  Returns  0 on success
+ *           else suitable error code
+ */
+static int __init hci_tty_init(void)
+{
+	int err = 0;
+	pr_info("inside %s\n", __func__);
+
+	/* Expose the device DEVICE_NAME to user space
+	 * And obtain the major number for the device
+	 */
+	hci_tty_major = register_chrdev(0, DEVICE_NAME, &hci_tty_chrdev_ops);
+
+	if (0 > hci_tty_major) {
+		pr_err("Error when registering to char dev");
+		return hci_tty_major;
+	}
+
+	/*  udev */
+	hci_tty_class = class_create(THIS_MODULE, DEVICE_NAME);
+	if (IS_ERR(hci_tty_class)) {
+		pr_err("Something went wrong in class_create");
+		unregister_chrdev(hci_tty_major, DEVICE_NAME);
+		return -1;
+	}
+
+	hci_tty_dev =
+		device_create(hci_tty_class, NULL, MKDEV(hci_tty_major, 0),
+			      NULL, DEVICE_NAME);
+	if (IS_ERR(hci_tty_dev)) {
+		pr_err("Error in device create");
+		unregister_chrdev(hci_tty_major, DEVICE_NAME);
+		class_destroy(hci_tty_class);
+		return -1;
+	}
+	pr_info("allocated %d, %d\n", hci_tty_major, 0);
+
+	/* Alloc and register "hciX" device so we can use hci
+	 * LED trigger feature
+	 */
+	hdev = hci_alloc_dev();
+	if (!hdev)
+		return -ENOMEM;
+
+	pr_info("hdev %p\n", hdev);
+
+	hdev->bus = HCI_UART;
+	hci_set_drvdata(hdev, NULL);
+	hdev->open = dumb_hci_callback;
+	hdev->close = dumb_hci_callback;
+	hdev->flush = NULL;
+	hdev->send = dumb_hci_send_callback;
+	hdev->dev_type = HCI_AMP;
+	err = hci_register_dev(hdev);
+	if (err < 0) {
+		pr_err("Can't register HCI device error %d\n", err);
+		hci_free_dev(hdev);
+		return err;
+	}
+
+	pr_info("HCI device registered (hdev %p)", hdev);
+
+	return 0;
+}
+
+/** hci_tty_exit Function
+ *  This function Destroys the hci_tty driver parametes and /dev/hci_tty node
+ *
+ *  Parameters : NULL
+ *  Returns   NULL
+ */
+static void __exit hci_tty_exit(void)
+{
+	pr_info("inside %s\n", __func__);
+	hci_unregister_dev(hdev);
+	hci_free_dev(hdev);
+
+	pr_info("bye.. freeing up %d\n", hci_tty_major);
+
+	device_destroy(hci_tty_class, MKDEV(hci_tty_major, 0));
+	class_destroy(hci_tty_class);
+	unregister_chrdev(hci_tty_major, DEVICE_NAME);
+}
+
+module_init(hci_tty_init);
+module_exit(hci_tty_exit);
+
+MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index f2b733275a0a..5fb7ee57663a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -47,13 +47,10 @@
 #include "queue.h"
 
 MODULE_ALIAS("mmc:block");
-
-#ifdef KERNEL
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
-#endif
 
 #define INAND_CMD38_ARG_EXT_CSD  113
 #define INAND_CMD38_ARG_ERASE    0x00
diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c
index 63c2e2ed1288..6aa6d0d63f7f 100644
--- a/drivers/mmc/host/dw_mmc-k3.c
+++ b/drivers/mmc/host/dw_mmc-k3.c
@@ -32,6 +32,12 @@ struct k3_priv {
 	struct regmap	*reg;
 };
 
+static unsigned long dw_mci_hi6220_caps[] = {
+	MMC_CAP_CMD23,
+	MMC_CAP_CMD23,
+	0
+};
+
 static void dw_mci_k3_set_ios(struct dw_mci *host, struct mmc_ios *ios)
 {
 	int ret;
@@ -125,10 +131,17 @@ static void dw_mci_hi6220_set_ios(struct dw_mci *host, struct mmc_ios *ios)
 	host->bus_hz = clk_get_rate(host->biu_clk);
 }
 
+static void dw_mci_hi6220_prepare_command(struct dw_mci *host, u32 *cmdr)
+{
+	*cmdr |= SDMMC_CMD_USE_HOLD_REG;
+}
+
 static const struct dw_mci_drv_data hi6220_data = {
+	.caps			= dw_mci_hi6220_caps,
 	.switch_voltage		= dw_mci_hi6220_switch_voltage,
 	.set_ios		= dw_mci_hi6220_set_ios,
 	.parse_dt		= dw_mci_hi6220_parse_dt,
+	.prepare_command        = dw_mci_hi6220_prepare_command,
 };
 
 static const struct of_device_id dw_mci_k3_match[] = {
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index fb204ee6ff89..900a4941eab5 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -250,7 +250,7 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
 
 	if (cmd->opcode == SD_SWITCH_VOLTAGE) {
-		u32 clk_en_a;
+		/*u32 clk_en_a;*/
 
 		/* Special bit makes CMD11 not die */
 		cmdr |= SDMMC_CMD_VOLT_SWITCH;
@@ -270,11 +270,11 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 		 * ever called with a non-zero clock.  That shouldn't happen
 		 * until the voltage change is all done.
 		 */
-		clk_en_a = mci_readl(host, CLKENA);
-		clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id);
-		mci_writel(host, CLKENA, clk_en_a);
-		mci_send_cmd(slot, SDMMC_CMD_UPD_CLK |
-			     SDMMC_CMD_PRV_DAT_WAIT, 0);
+		/*clk_en_a = mci_readl(host, CLKENA);*/
+		/*clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id);*/
+		/*mci_writel(host, CLKENA, clk_en_a);*/
+		/*mci_send_cmd(slot, SDMMC_CMD_UPD_CLK |*/
+			     /*SDMMC_CMD_PRV_DAT_WAIT, 0);*/
 	}
 
 	if (cmd->flags & MMC_RSP_PRESENT) {
@@ -1124,7 +1124,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 
 		/* enable clock; only low power if no SDIO */
 		clk_en_a = SDMMC_CLKEN_ENABLE << slot->id;
-		if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags))
+		if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags) && (slot->mmc->index != 1))
 			clk_en_a |= SDMMC_CLKEN_LOW_PWR << slot->id;
 		mci_writel(host, CLKENA, clk_en_a);
 
@@ -1298,6 +1298,8 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		regs |= ((0x1 << slot->id) << 16);
 	else
 		regs &= ~((0x1 << slot->id) << 16);
+	if (mmc->index == 1)
+		regs |= (0x1 << slot->id);
 
 	mci_writel(slot->host, UHS_REG, regs);
 	slot->host->timing = ios->timing;
@@ -1540,6 +1542,8 @@ static int dw_mci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
 	if (drv_data && drv_data->execute_tuning)
 		err = drv_data->execute_tuning(slot, opcode);
+	else
+		err = 0;
 	return err;
 }
 
@@ -2879,6 +2883,15 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
+	/* find reset controller when exist */
+	pdata->rstc = devm_reset_control_get_optional(dev, NULL);
+	if (IS_ERR(pdata->rstc)) {
+		if (PTR_ERR(pdata->rstc) == -EPROBE_DEFER)
+			return ERR_PTR(-EPROBE_DEFER);
+		else
+			pdata->rstc = NULL;
+	}
+
 	/* find out number of slots supported */
 	if (of_property_read_u32(dev->of_node, "num-slots",
 				&pdata->num_slots)) {
@@ -2959,7 +2972,9 @@ int dw_mci_probe(struct dw_mci *host)
 
 	if (!host->pdata) {
 		host->pdata = dw_mci_parse_dt(host);
-		if (IS_ERR(host->pdata)) {
+		if (PTR_ERR(host->pdata) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		else if (IS_ERR(host->pdata)) {
 			dev_err(host->dev, "platform data not available\n");
 			return -EINVAL;
 		}
@@ -3028,6 +3043,9 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
+	if (host->pdata->rstc != NULL)
+		reset_control_deassert(host->pdata->rstc);
+
 	setup_timer(&host->cmd11_timer,
 		    dw_mci_cmd11_timer, (unsigned long)host);
 
@@ -3174,6 +3192,9 @@ err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
 
+	if (host->pdata->rstc != NULL)
+		reset_control_assert(host->pdata->rstc);
+
 err_clk_ciu:
 	if (!IS_ERR(host->ciu_clk))
 		clk_disable_unprepare(host->ciu_clk);
@@ -3206,11 +3227,15 @@ void dw_mci_remove(struct dw_mci *host)
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
 
+	if (host->pdata->rstc != NULL)
+		reset_control_assert(host->pdata->rstc);
+
 	if (!IS_ERR(host->ciu_clk))
 		clk_disable_unprepare(host->ciu_clk);
 
 	if (!IS_ERR(host->biu_clk))
 		clk_disable_unprepare(host->biu_clk);
+
 }
 EXPORT_SYMBOL(dw_mci_remove);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1a14ca8965e6..1a78410eeec3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2079,7 +2079,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index b5679fb67591..1f08800a6473 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -215,6 +215,15 @@ config PHY_MT65XX_USB3
 	  for mt65xx SoCs. it supports two usb2.0 ports and
 	  one usb3.0 port.
 
+config PHY_HI6220_USB
+	tristate "hi6220 USB PHY support"
+	select GENERIC_PHY
+	select MFD_SYSCON
+	help
+	  Enable this to support the HISILICON HI6220 USB PHY.
+
+	  To compile this driver as a module, choose M here.
+
 config PHY_SUN4I_USB
 	tristate "Allwinner sunxi SoC USB PHY driver"
 	depends on ARCH_SUNXI && HAS_IOMEM && OF
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 075db1a81aa5..faccda1f237f 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_TI_PIPE3)			+= phy-ti-pipe3.o
 obj-$(CONFIG_TWL4030_USB)		+= phy-twl4030-usb.o
 obj-$(CONFIG_PHY_EXYNOS5250_SATA)	+= phy-exynos5250-sata.o
 obj-$(CONFIG_PHY_HIX5HD2_SATA)		+= phy-hix5hd2-sata.o
+obj-$(CONFIG_PHY_HI6220_USB)		+= phy-hi6220-usb.o
 obj-$(CONFIG_PHY_MT65XX_USB3)		+= phy-mt65xx-usb3.o
 obj-$(CONFIG_PHY_SUN4I_USB)		+= phy-sun4i-usb.o
 obj-$(CONFIG_PHY_SUN9I_USB)		+= phy-sun9i-usb.o
diff --git a/drivers/phy/phy-hi6220-usb.c b/drivers/phy/phy-hi6220-usb.c
new file mode 100644
index 000000000000..b2141cbd4cf6
--- /dev/null
+++ b/drivers/phy/phy-hi6220-usb.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Linaro Ltd.
+ * Copyright (c) 2015 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
+#include <linux/regmap.h>
+
+#define SC_PERIPH_CTRL4			0x00c
+
+#define CTRL4_PICO_SIDDQ		BIT(6)
+#define CTRL4_PICO_OGDISABLE		BIT(8)
+#define CTRL4_PICO_VBUSVLDEXT		BIT(10)
+#define CTRL4_PICO_VBUSVLDEXTSEL	BIT(11)
+#define CTRL4_OTG_PHY_SEL		BIT(21)
+
+#define SC_PERIPH_CTRL5			0x010
+
+#define CTRL5_USBOTG_RES_SEL		BIT(3)
+#define CTRL5_PICOPHY_ACAENB		BIT(4)
+#define CTRL5_PICOPHY_BC_MODE		BIT(5)
+#define CTRL5_PICOPHY_CHRGSEL		BIT(6)
+#define CTRL5_PICOPHY_VDATSRCEND	BIT(7)
+#define CTRL5_PICOPHY_VDATDETENB	BIT(8)
+#define CTRL5_PICOPHY_DCDENB		BIT(9)
+#define CTRL5_PICOPHY_IDDIG		BIT(10)
+
+#define SC_PERIPH_CTRL8			0x018
+#define SC_PERIPH_RSTEN0		0x300
+#define SC_PERIPH_RSTDIS0		0x304
+
+#define RST0_USBOTG_BUS			BIT(4)
+#define RST0_POR_PICOPHY		BIT(5)
+#define RST0_USBOTG			BIT(6)
+#define RST0_USBOTG_32K			BIT(7)
+
+#define EYE_PATTERN_PARA		0x7053348c
+
+struct hi6220_priv {
+	struct regmap *reg;
+	struct device *dev;
+};
+
+static void hi6220_phy_init(struct hi6220_priv *priv)
+{
+	struct regmap *reg = priv->reg;
+	u32 val, mask;
+
+	val = RST0_USBOTG_BUS | RST0_POR_PICOPHY |
+	      RST0_USBOTG | RST0_USBOTG_32K;
+	mask = val;
+	regmap_update_bits(reg, SC_PERIPH_RSTEN0, mask, val);
+	regmap_update_bits(reg, SC_PERIPH_RSTDIS0, mask, val);
+}
+
+static int hi6220_phy_setup(struct hi6220_priv *priv, bool on)
+{
+	struct regmap *reg = priv->reg;
+	u32 val, mask;
+	int ret;
+
+	if (on) {
+		val = CTRL5_USBOTG_RES_SEL | CTRL5_PICOPHY_ACAENB;
+		mask = val | CTRL5_PICOPHY_BC_MODE;
+		ret = regmap_update_bits(reg, SC_PERIPH_CTRL5, mask, val);
+		if (ret)
+			goto out;
+
+		val =  CTRL4_PICO_VBUSVLDEXT | CTRL4_PICO_VBUSVLDEXTSEL |
+		       CTRL4_OTG_PHY_SEL;
+		mask = val | CTRL4_PICO_SIDDQ | CTRL4_PICO_OGDISABLE;
+		ret = regmap_update_bits(reg, SC_PERIPH_CTRL4, mask, val);
+		if (ret)
+			goto out;
+
+		ret = regmap_write(reg, SC_PERIPH_CTRL8, EYE_PATTERN_PARA);
+		if (ret)
+			goto out;
+	} else {
+		val = CTRL4_PICO_SIDDQ;
+		mask = val;
+		ret = regmap_update_bits(reg, SC_PERIPH_CTRL4, mask, val);
+		if (ret)
+			goto out;
+	}
+
+	return 0;
+out:
+	dev_err(priv->dev, "failed to setup phy ret: %d\n", ret);
+	return ret;
+}
+
+static int hi6220_phy_start(struct phy *phy)
+{
+	struct hi6220_priv *priv = phy_get_drvdata(phy);
+
+	return hi6220_phy_setup(priv, true);
+}
+
+static int hi6220_phy_exit(struct phy *phy)
+{
+	struct hi6220_priv *priv = phy_get_drvdata(phy);
+
+	return hi6220_phy_setup(priv, false);
+}
+
+static struct phy_ops hi6220_phy_ops = {
+	.init		= hi6220_phy_start,
+	.exit		= hi6220_phy_exit,
+	.owner		= THIS_MODULE,
+};
+
+static int hi6220_phy_probe(struct platform_device *pdev)
+{
+	struct phy_provider *phy_provider;
+	struct device *dev = &pdev->dev;
+	struct phy *phy;
+	struct hi6220_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->reg = syscon_regmap_lookup_by_phandle(dev->of_node,
+					"hisilicon,peripheral-syscon");
+	if (IS_ERR(priv->reg)) {
+		dev_err(dev, "no hisilicon,peripheral-syscon\n");
+		return PTR_ERR(priv->reg);
+	}
+
+	hi6220_phy_init(priv);
+
+	phy = devm_phy_create(dev, NULL, &hi6220_phy_ops);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	phy_set_drvdata(phy, priv);
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id hi6220_phy_of_match[] = {
+	{.compatible = "hisilicon,hi6220-usb-phy",},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, hi6220_phy_of_match);
+
+static struct platform_driver hi6220_phy_driver = {
+	.probe	= hi6220_phy_probe,
+	.driver = {
+		.name	= "hi6220-usb-phy",
+		.of_match_table	= hi6220_phy_of_match,
+	}
+};
+module_platform_driver(hi6220_phy_driver);
+
+MODULE_DESCRIPTION("HISILICON HI6220 USB PHY driver");
+MODULE_ALIAS("platform:hi6220-usb-phy");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 00676208080e..10c46e236e8b 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -261,6 +261,21 @@ config REGULATOR_HI6421
 	  21 general purpose LDOs, 3 dedicated LDOs, and 5 BUCKs. All
 	  of them come with support to either ECO (idle) or sleep mode.
 
+config REGULATOR_HI6220_MTCMOS
+	bool "Hisilicon Hi6220 mtcmos support"
+	depends on ARCH_HISI
+	help
+	  This driver provides support for the mtcmos regulators of Hi6220 Soc.
+
+
+config REGULATOR_HI655X
+	tristate "Hisilicon HI655X PMIC regulators support"
+	depends on ARCH_HISI || COMPILE_TEST
+	depends on MFD_HI655X_PMIC && OF
+	help
+	  This driver provides support for the voltage regulators of the
+	  Hisilicon Hi655x PMIC device.
+
 config REGULATOR_ISL9305
 	tristate "Intersil ISL9305 regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 0f8174913c17..7345d43551ee 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -34,6 +34,8 @@ obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
 obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o
 obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o
 obj-$(CONFIG_REGULATOR_HI6421) += hi6421-regulator.o
+obj-$(CONFIG_REGULATOR_HI6220_MTCMOS) += hi6220-mtcmos.o
+obj-$(CONFIG_REGULATOR_HI655X) += hi655x-regulator.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
 obj-$(CONFIG_REGULATOR_ISL9305) += isl9305.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
diff --git a/drivers/regulator/hi6220-mtcmos.c b/drivers/regulator/hi6220-mtcmos.c
new file mode 100644
index 000000000000..492be7adfaa2
--- /dev/null
+++ b/drivers/regulator/hi6220-mtcmos.c
@@ -0,0 +1,269 @@
+/*
+ * Device driver for MTCMOS DRIVER	 in HI6220 SOC
+ *
+ * Copyright (c) 2011 Hisilicon Co. Ltd
+ *
+ */
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+
+enum {
+	HI6220_MTCMOS1,
+	HI6220_MTCMOS2,
+	HI6220_RG_MAX,
+};
+
+struct hi6220_mtcmos_ctrl_regs {
+	unsigned int enable_reg;
+	unsigned int disable_reg;
+	unsigned int status_reg;
+};
+
+struct hi6220_mtcmos_ctrl_data {
+	int shift;
+	unsigned int mask;
+};
+
+struct hi6220_mtcmos_info {
+	struct regulator_desc rdesc;
+	struct hi6220_mtcmos_ctrl_regs ctrl_regs;
+	struct hi6220_mtcmos_ctrl_data ctrl_data;
+};
+
+struct hi6220_mtcmos {
+	struct regulator_dev *rdev[HI6220_RG_MAX];
+	void __iomem *sc_on_regs;
+	int mtcmos_steady_time;
+	spinlock_t mtcmos_spin_lock;
+};
+
+static int hi6220_mtcmos_is_on(struct hi6220_mtcmos *mtcmos,
+				unsigned int regs, unsigned int mask, int shift)
+{
+	unsigned int ret;
+	unsigned long mtcmos_spin_flag = 0;
+
+	spin_lock_irqsave(&mtcmos->mtcmos_spin_lock, mtcmos_spin_flag);
+	ret = readl(mtcmos->sc_on_regs + regs);
+	spin_unlock_irqrestore(&mtcmos->mtcmos_spin_lock, mtcmos_spin_flag);
+
+	ret &= (mask << shift);
+	return !!ret;
+}
+
+int hi6220_mtcmos_on(struct hi6220_mtcmos *mtcmos,
+				unsigned int regs, unsigned int mask, int shift)
+{
+	unsigned long mtcmos_spin_flag = 0;
+
+	spin_lock_irqsave(&mtcmos->mtcmos_spin_lock, mtcmos_spin_flag);
+	writel(mask << shift, mtcmos->sc_on_regs + regs);
+	udelay(mtcmos->mtcmos_steady_time);
+	spin_unlock_irqrestore(&mtcmos->mtcmos_spin_lock, mtcmos_spin_flag);
+
+	return 0;
+}
+
+int hi6220_mtcmos_off(struct hi6220_mtcmos *mtcmos,
+			unsigned int regs, unsigned int mask, int shift)
+{
+	unsigned long mtcmos_spin_flag = 0;
+
+	spin_lock_irqsave(&mtcmos->mtcmos_spin_lock, mtcmos_spin_flag);
+	writel(mask << shift, mtcmos->sc_on_regs + regs);
+	udelay(mtcmos->mtcmos_steady_time);
+	spin_unlock_irqrestore(&mtcmos->mtcmos_spin_lock,
+		mtcmos_spin_flag);
+
+	return 0;
+}
+
+static int hi6220_regulator_mtcmos_is_enabled(struct regulator_dev *rdev)
+{
+	int ret;
+	struct hi6220_mtcmos_info *sreg = rdev_get_drvdata(rdev);
+	struct platform_device *pdev =
+		container_of(rdev->dev.parent, struct platform_device, dev);
+	struct hi6220_mtcmos *mtcmos = platform_get_drvdata(pdev);
+	struct hi6220_mtcmos_ctrl_regs *ctrl_regs = &(sreg->ctrl_regs);
+	struct hi6220_mtcmos_ctrl_data *ctrl_data = &(sreg->ctrl_data);
+
+	ret = hi6220_mtcmos_is_on(mtcmos, ctrl_regs->status_reg,
+					ctrl_data->mask, ctrl_data->shift);
+	return ret;
+}
+
+static int hi6220_regulator_mtcmos_enabled(struct regulator_dev *rdev)
+{
+	int ret;
+	struct hi6220_mtcmos_info *sreg = rdev_get_drvdata(rdev);
+	struct platform_device *pdev =
+		container_of(rdev->dev.parent, struct platform_device, dev);
+	struct hi6220_mtcmos *mtcmos = platform_get_drvdata(pdev);
+	struct hi6220_mtcmos_ctrl_regs *ctrl_regs = &(sreg->ctrl_regs);
+	struct hi6220_mtcmos_ctrl_data *ctrl_data = &(sreg->ctrl_data);
+
+	ret =  hi6220_mtcmos_on(mtcmos, ctrl_regs->enable_reg,
+					ctrl_data->mask, ctrl_data->shift);
+	if (0 == hi6220_mtcmos_is_on(mtcmos, ctrl_regs->status_reg,
+					ctrl_data->mask, ctrl_data->shift)) {
+		return -1;
+	}
+	return ret;
+}
+
+static int hi6220_regulator_mtcmos_disabled(struct regulator_dev *rdev)
+{
+	int ret;
+	struct hi6220_mtcmos_info *sreg = rdev_get_drvdata(rdev);
+	struct platform_device *pdev =
+		container_of(rdev->dev.parent, struct platform_device, dev);
+	struct hi6220_mtcmos *mtcmos = platform_get_drvdata(pdev);
+	struct hi6220_mtcmos_ctrl_regs  *ctrl_regs = &(sreg->ctrl_regs);
+	struct hi6220_mtcmos_ctrl_data  *ctrl_data = &(sreg->ctrl_data);
+
+	ret = hi6220_mtcmos_off(mtcmos, ctrl_regs->disable_reg,
+					ctrl_data->mask, ctrl_data->shift);
+
+	return ret;
+}
+
+static struct regulator_ops hi6220_mtcmos_mtcmos_rops = {
+	.is_enabled = hi6220_regulator_mtcmos_is_enabled,
+	.enable = hi6220_regulator_mtcmos_enabled,
+	.disable = hi6220_regulator_mtcmos_disabled,
+};
+
+#define HI6220_MTCMOS(vreg) \
+{								\
+	.rdesc = {					\
+		.name = #vreg,			\
+		.ops	= &hi6220_mtcmos_mtcmos_rops, \
+		.type = REGULATOR_VOLTAGE,			\
+		.owner = THIS_MODULE,		\
+	},							\
+}
+
+static struct hi6220_mtcmos_info hi6220_mtcmos_info[] = {
+	HI6220_MTCMOS(MTCMOS1),
+	HI6220_MTCMOS(MTCMOS2),
+};
+
+static struct of_regulator_match hi6220_mtcmos_matches[] = {
+	{ .name = "mtcmos1",
+		.driver_data = &hi6220_mtcmos_info[HI6220_MTCMOS1], },
+	{ .name = "mtcmos2",
+		.driver_data = &hi6220_mtcmos_info[HI6220_MTCMOS2], },
+};
+
+static int hi6220_mtcmos_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct hi6220_mtcmos *mtcmos;
+	const __be32 *sc_on_regs = NULL;
+	void __iomem	*regs;
+	struct device *dev;
+	struct device_node *np, *child;
+	int count, i;
+	struct regulator_config config = { };
+	struct regulator_init_data *init_data;
+	struct hi6220_mtcmos_info *sreg;
+
+	dev = &pdev->dev;
+	np = dev->of_node;
+	mtcmos = devm_kzalloc(dev,
+		sizeof(struct hi6220_mtcmos), GFP_KERNEL);
+	if (!mtcmos) {
+		dev_err(dev, "cannot allocate hi6220_mtcmos device info\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init((spinlock_t *)&mtcmos->mtcmos_spin_lock);
+	sc_on_regs = of_get_property(np, "hisilicon,mtcmos-sc-on-base", NULL);
+	if (sc_on_regs) {
+		regs = ioremap(be32_to_cpu(*sc_on_regs), 0x1000);
+		mtcmos->sc_on_regs = regs;
+	}
+	ret = of_property_read_u32(np, "hisilicon,mtcmos-steady-us",
+						&mtcmos->mtcmos_steady_time);
+
+	count = of_regulator_match(&pdev->dev, np,
+				 hi6220_mtcmos_matches,
+				 ARRAY_SIZE(hi6220_mtcmos_matches));
+
+	for (i = 0; i < HI6220_RG_MAX; i++) {
+		init_data = hi6220_mtcmos_matches[i].init_data;
+		if (!init_data)
+			continue;
+		sreg = hi6220_mtcmos_matches[i].driver_data;
+		config.dev = &pdev->dev;
+		config.init_data = init_data;
+		config.driver_data = sreg;
+		config.of_node = hi6220_mtcmos_matches[i].of_node;
+		child = config.of_node;
+
+		ret = of_property_read_u32_array(child, "hisilicon,ctrl-regs",
+				(unsigned int *)(&sreg->ctrl_regs), 0x3);
+		ret = of_property_read_u32_array(child, "hisilicon,ctrl-data",
+				(unsigned int *)(&sreg->ctrl_data), 0x2);
+
+		mtcmos->rdev[i] = regulator_register(&sreg->rdesc, &config);
+		if (IS_ERR(mtcmos->rdev[i])) {
+			ret = PTR_ERR(mtcmos->rdev[i]);
+			dev_err(&pdev->dev, "failed to register mtcmos %s\n",
+					sreg->rdesc.name);
+			while (--i >= 0)
+				regulator_unregister(mtcmos->rdev[i]);
+
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(pdev, mtcmos);
+
+	return 0;
+}
+
+static struct of_device_id of_hi6220_mtcmos_match_tbl[] = {
+	{ .compatible = "hisilicon,hi6220-mtcmos-driver", },
+	{}
+};
+
+static struct platform_driver mtcmos_driver = {
+	.driver = {
+		.name = "hisi_hi6220_mtcmos",
+		.owner = THIS_MODULE,
+		.of_match_table = of_hi6220_mtcmos_match_tbl,
+	},
+	.probe = hi6220_mtcmos_probe,
+};
+
+static int __init hi6220_mtcmos_init(void)
+{
+	return platform_driver_register(&mtcmos_driver);
+}
+
+static void __exit hi6220_mtcmos_exit(void)
+{
+	platform_driver_unregister(&mtcmos_driver);
+}
+
+fs_initcall(hi6220_mtcmos_init);
+module_exit(hi6220_mtcmos_exit);
+
+MODULE_AUTHOR("Baixing Quan<quanbaixing@huawei.com>");
+MODULE_DESCRIPTION("HI6220 MTCMOS interface driver");
+MODULE_LICENSE("GPL V2");
diff --git a/drivers/regulator/hi655x-regulator.c b/drivers/regulator/hi655x-regulator.c
new file mode 100644
index 000000000000..9074355791a2
--- /dev/null
+++ b/drivers/regulator/hi655x-regulator.c
@@ -0,0 +1,226 @@
+/*
+ * Device driver for regulators in hi655x IC
+ *
+ * Copyright (c) 2016 Hisilicon.
+ *
+ * Authors:
+ * Chen Feng <puck.chen@hisilicon.com>
+ * Fei  Wang <w.f@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/mfd/hi655x-pmic.h>
+
+struct hi655x_regulator {
+	unsigned int disable_reg;
+	unsigned int status_reg;
+	unsigned int ctrl_regs;
+	unsigned int ctrl_mask;
+	struct regulator_desc rdesc;
+};
+
+static const unsigned int ldo7_voltages[] = {
+	1800000, 1850000, 2850000, 2900000,
+	3000000, 3100000, 3200000, 3300000,
+};
+
+static const unsigned int ldo19_voltages[] = {
+	1800000, 1850000, 1900000, 1750000,
+	2800000, 2850000, 2900000, 3000000,
+};
+
+static const unsigned int ldo22_voltages[] = {
+	 900000, 1000000, 1050000, 1100000,
+	1150000, 1175000, 1185000, 1200000,
+};
+
+enum hi655x_regulator_id {
+	HI655X_LDO0,
+	HI655X_LDO1,
+	HI655X_LDO2,
+	HI655X_LDO3,
+	HI655X_LDO4,
+	HI655X_LDO5,
+	HI655X_LDO6,
+	HI655X_LDO7,
+	HI655X_LDO8,
+	HI655X_LDO9,
+	HI655X_LDO10,
+	HI655X_LDO11,
+	HI655X_LDO12,
+	HI655X_LDO13,
+	HI655X_LDO14,
+	HI655X_LDO15,
+	HI655X_LDO16,
+	HI655X_LDO17,
+	HI655X_LDO18,
+	HI655X_LDO19,
+	HI655X_LDO20,
+	HI655X_LDO21,
+	HI655X_LDO22,
+};
+
+static int hi655x_is_enabled(struct regulator_dev *rdev)
+{
+	unsigned int value = 0;
+
+	struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
+
+	regmap_read(rdev->regmap, regulator->status_reg, &value);
+	return (value & BIT(regulator->ctrl_mask));
+}
+
+static int hi655x_disable(struct regulator_dev *rdev)
+{
+	int ret = 0;
+
+	struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
+
+	ret = regmap_write(rdev->regmap, regulator->disable_reg,
+			   BIT(regulator->ctrl_mask));
+	return ret;
+}
+
+static struct regulator_ops hi655x_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = hi655x_disable,
+	.is_enabled = hi655x_is_enabled,
+	.list_voltage = regulator_list_voltage_table,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+};
+
+static struct regulator_ops hi655x_ldo_linear_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = hi655x_disable,
+	.is_enabled = hi655x_is_enabled,
+	.list_voltage = regulator_list_voltage_linear,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+};
+
+#define HI655X_LDO(_ID, vreg, vmask, ereg, dreg,                 \
+		   sreg, cmask, vtable) {                        \
+	.rdesc = {                                               \
+		.name            = #_ID,                         \
+		.of_match        = of_match_ptr(#_ID),           \
+		.ops             = &hi655x_regulator_ops,        \
+		.regulators_node = of_match_ptr("regulators"),   \
+		.type            = REGULATOR_VOLTAGE,            \
+		.id              = HI655X_##_ID,                 \
+		.owner           = THIS_MODULE,                  \
+		.n_voltages      = ARRAY_SIZE(vtable),           \
+		.volt_table      = vtable,                       \
+		.vsel_reg        = HI655X_BUS_ADDR(vreg),        \
+		.vsel_mask       = vmask,                        \
+		.enable_reg      = HI655X_BUS_ADDR(ereg),        \
+		.enable_mask     = BIT(cmask),                   \
+	},                                                       \
+	.disable_reg = HI655X_BUS_ADDR(dreg),                    \
+	.status_reg = HI655X_BUS_ADDR(sreg),                     \
+	.ctrl_mask = cmask,                                      \
+}
+
+#define HI655X_LDO_LINEAR(_ID, vreg, vmask, ereg, dreg,          \
+			  sreg, cmask, minv, nvolt, vstep) {     \
+	.rdesc = {                                               \
+		.name            = #_ID,                         \
+		.of_match        = of_match_ptr(#_ID),           \
+		.ops             = &hi655x_ldo_linear_ops,       \
+		.regulators_node = of_match_ptr("regulators"),   \
+		.type            = REGULATOR_VOLTAGE,            \
+		.id              = HI655X_##_ID,                 \
+		.owner           = THIS_MODULE,                  \
+		.min_uV          = minv,                         \
+		.n_voltages      = nvolt,                        \
+		.uV_step         = vstep,                        \
+		.vsel_reg        = HI655X_BUS_ADDR(vreg),        \
+		.vsel_mask       = vmask,                        \
+		.enable_reg      = HI655X_BUS_ADDR(ereg),        \
+		.enable_mask     = BIT(cmask),                   \
+	},                                                       \
+	.disable_reg = HI655X_BUS_ADDR(dreg),                    \
+	.status_reg = HI655X_BUS_ADDR(sreg),                     \
+	.ctrl_mask = cmask,                                      \
+}
+
+static struct hi655x_regulator regulators[] = {
+	HI655X_LDO_LINEAR(LDO2, 0x72, 0x07, 0x29, 0x2a, 0x2b, 0x01,
+			  2500000, 8, 100000),
+	HI655X_LDO(LDO7, 0x78, 0x07, 0x29, 0x2a, 0x2b, 0x06, ldo7_voltages),
+	HI655X_LDO(LDO10, 0x78, 0x07, 0x29, 0x2a, 0x2b, 0x01, ldo7_voltages),
+	HI655X_LDO_LINEAR(LDO13, 0x7e, 0x07, 0x2c, 0x2d, 0x2e, 0x04,
+			  1600000, 8, 50000),
+	HI655X_LDO_LINEAR(LDO14, 0x7f, 0x07, 0x2c, 0x2d, 0x2e, 0x05,
+			  2500000, 8, 100000),
+	HI655X_LDO_LINEAR(LDO15, 0x80, 0x07, 0x2c, 0x2d, 0x2e, 0x06,
+			  1600000, 8, 50000),
+	HI655X_LDO_LINEAR(LDO17, 0x82, 0x07, 0x2f, 0x30, 0x31, 0x00,
+			  2500000, 8, 100000),
+	HI655X_LDO(LDO19, 0x84, 0x07, 0x2f, 0x30, 0x31, 0x02, ldo19_voltages),
+	HI655X_LDO_LINEAR(LDO21, 0x86, 0x07, 0x2f, 0x30, 0x31, 0x04,
+			  1650000, 8, 50000),
+	HI655X_LDO(LDO22, 0x87, 0x07, 0x2f, 0x30, 0x31, 0x05, ldo22_voltages),
+};
+
+static int hi655x_regulator_probe(struct platform_device *pdev)
+{
+	unsigned int i;
+	struct hi655x_regulator *regulator;
+	struct hi655x_pmic *pmic;
+	struct regulator_config config = { };
+	struct regulator_dev *rdev;
+
+	pmic = dev_get_drvdata(pdev->dev.parent);
+	if (!pmic) {
+		dev_err(&pdev->dev, "no pmic in the regulator parent node\n");
+		return -ENODEV;
+	}
+
+	regulator = devm_kzalloc(&pdev->dev, sizeof(*regulator), GFP_KERNEL);
+	if (!regulator)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, regulator);
+
+	config.dev = pdev->dev.parent;
+	config.regmap = pmic->regmap;
+	config.driver_data = regulator;
+	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
+		rdev = devm_regulator_register(&pdev->dev,
+					       &regulators[i].rdesc,
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register regulator %s\n",
+				regulator->rdesc.name);
+			return PTR_ERR(rdev);
+		}
+	}
+	return 0;
+}
+
+static struct platform_driver hi655x_regulator_driver = {
+	.driver = {
+		.name	= "hi655x-regulator",
+	},
+	.probe	= hi655x_regulator_probe,
+};
+module_platform_driver(hi655x_regulator_driver);
+
+MODULE_AUTHOR("Chen Feng <puck.chen@hisilicon.com>");
+MODULE_DESCRIPTION("Hisilicon hi655x regulator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 0615f50a14cd..df37212a5cbd 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -13,3 +13,4 @@ menuconfig RESET_CONTROLLER
 	  If unsure, say no.
 
 source "drivers/reset/sti/Kconfig"
+source "drivers/reset/hisilicon/Kconfig"
diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile
index 85d5904e5480..99e18c875e8a 100644
--- a/drivers/reset/Makefile
+++ b/drivers/reset/Makefile
@@ -4,5 +4,6 @@ obj-$(CONFIG_ARCH_SOCFPGA) += reset-socfpga.o
 obj-$(CONFIG_ARCH_BERLIN) += reset-berlin.o
 obj-$(CONFIG_ARCH_SUNXI) += reset-sunxi.o
 obj-$(CONFIG_ARCH_STI) += sti/
+obj-$(CONFIG_ARCH_HISI) += hisilicon/
 obj-$(CONFIG_ARCH_ZYNQ) += reset-zynq.o
 obj-$(CONFIG_ATH79) += reset-ath79.o
diff --git a/drivers/reset/hisilicon/Kconfig b/drivers/reset/hisilicon/Kconfig
new file mode 100644
index 000000000000..26bf95a83a8e
--- /dev/null
+++ b/drivers/reset/hisilicon/Kconfig
@@ -0,0 +1,5 @@
+config COMMON_RESET_HI6220
+	tristate "Hi6220 Reset Driver"
+	depends on (ARCH_HISI && RESET_CONTROLLER)
+	help
+	  Build the Hisilicon Hi6220 reset driver.
diff --git a/drivers/reset/hisilicon/Makefile b/drivers/reset/hisilicon/Makefile
new file mode 100644
index 000000000000..c932f86e2f10
--- /dev/null
+++ b/drivers/reset/hisilicon/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_COMMON_RESET_HI6220) += hi6220_reset.o
diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
new file mode 100644
index 000000000000..ad708bdb2448
--- /dev/null
+++ b/drivers/reset/hisilicon/hi6220_reset.c
@@ -0,0 +1,164 @@
+/*
+ * Hisilicon Hi6220 reset controller driver
+ *
+ * Copyright (c) 2015-2016 Hisilicon Limited.
+ *
+ * Author: Feng Chen <puck.chen@hisilicon.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/reset-controller.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+
+/*
+ * peripheral ctrl regs
+ */
+#define PERIPH_ASSERT_OFFSET      0x300
+#define PERIPH_DEASSERT_OFFSET    0x304
+#define PERIPH_MAX_INDEX          0x509
+
+/*
+ * media ctrl regs
+ */
+#define SC_MEDIA_RSTEN            0x052C
+#define SC_MEDIA_RSTDIS           0x0530
+#define MEDIA_MAX_INDEX           8
+
+enum hi6220_reset_ctrl_type {
+	PERIPHERAL,
+	MEDIA,
+};
+
+#define to_reset_data(x) container_of(x, struct hi6220_reset_data, rc_dev)
+
+struct hi6220_reset_data {
+	struct reset_controller_dev rc_dev;
+	enum hi6220_reset_ctrl_type type;
+	struct regmap *regmap;
+};
+
+static int hi6220_media_assert(struct reset_controller_dev *rc_dev,
+			       unsigned long idx)
+{
+	struct hi6220_reset_data *data = to_reset_data(rc_dev);
+	struct regmap *regmap = data->regmap;
+
+	return regmap_write(regmap, SC_MEDIA_RSTEN, BIT(idx));
+}
+
+static int hi6220_media_deassert(struct reset_controller_dev *rc_dev,
+				 unsigned long idx)
+{
+	struct hi6220_reset_data *data = to_reset_data(rc_dev);
+	struct regmap *regmap = data->regmap;
+
+	return regmap_write(regmap, SC_MEDIA_RSTDIS, BIT(idx));
+}
+
+static int hi6220_peripheral_assert(struct reset_controller_dev *rc_dev,
+				    unsigned long idx)
+{
+	struct hi6220_reset_data *data = to_reset_data(rc_dev);
+	struct regmap *regmap = data->regmap;
+	u32 bank = idx >> 8;
+	u32 offset = idx & 0xff;
+	u32 reg = PERIPH_ASSERT_OFFSET + bank * 0x10;
+
+	return regmap_write(regmap, reg, BIT(offset));
+}
+
+static int hi6220_peripheral_deassert(struct reset_controller_dev *rc_dev,
+				      unsigned long idx)
+{
+	struct hi6220_reset_data *data = to_reset_data(rc_dev);
+	struct regmap *regmap = data->regmap;
+	u32 bank = idx >> 8;
+	u32 offset = idx & 0xff;
+	u32 reg = PERIPH_DEASSERT_OFFSET + bank * 0x10;
+
+	return regmap_write(regmap, reg, BIT(offset));
+}
+
+static const struct reset_control_ops hi6220_media_reset_ops = {
+	.assert = hi6220_media_assert,
+	.deassert = hi6220_media_deassert,
+};
+
+static const struct reset_control_ops hi6220_peripheral_reset_ops = {
+	.assert = hi6220_peripheral_assert,
+	.deassert = hi6220_peripheral_deassert,
+};
+
+static int hi6220_reset_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	enum hi6220_reset_ctrl_type type;
+	struct hi6220_reset_data *data;
+	struct regmap *regmap;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	type = (enum hi6220_reset_ctrl_type)of_device_get_match_data(dev);
+
+	regmap = syscon_node_to_regmap(np);
+	if (IS_ERR(regmap)) {
+		dev_err(dev, "failed to get reset controller regmap\n");
+		return PTR_ERR(regmap);
+	}
+
+	data->type = type;
+	data->regmap = regmap;
+	data->rc_dev.of_node = np;
+	if (type == MEDIA) {
+		data->rc_dev.ops = &hi6220_media_reset_ops;
+		data->rc_dev.nr_resets = MEDIA_MAX_INDEX;
+	} else {
+		data->rc_dev.ops = &hi6220_peripheral_reset_ops;
+		data->rc_dev.nr_resets = PERIPH_MAX_INDEX;
+	}
+
+	return reset_controller_register(&data->rc_dev);
+}
+
+static const struct of_device_id hi6220_reset_match[] = {
+	{
+		.compatible = "hisilicon,hi6220-sysctrl",
+		.data = (void *)PERIPHERAL,
+	},
+	{
+		.compatible = "hisilicon,hi6220-mediactrl",
+		.data = (void *)MEDIA,
+	},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, hi6220_reset_match);
+
+static struct platform_driver hi6220_reset_driver = {
+	.probe = hi6220_reset_probe,
+	.driver = {
+		.name = "reset-hi6220",
+		.of_match_table = hi6220_reset_match,
+	},
+};
+
+static int __init hi6220_reset_init(void)
+{
+	return platform_driver_register(&hi6220_reset_driver);
+}
+
+postcore_initcall(hi6220_reset_init);
diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig
new file mode 100644
index 000000000000..72280459a021
--- /dev/null
+++ b/drivers/tee/Kconfig
@@ -0,0 +1,19 @@
+# Generic Trusted Execution Environment Configuration
+config TEE
+	bool "Trusted Execution Environment support"
+	default n
+	select DMA_SHARED_BUFFER
+	select GENERIC_ALLOCATOR
+	help
+	  This implements a generic interface towards a Trusted Execution
+	  Environment (TEE).
+
+if TEE
+
+menu "TEE drivers"
+
+source "drivers/tee/optee/Kconfig"
+
+endmenu
+
+endif
diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile
new file mode 100644
index 000000000000..53f3c7614f2c
--- /dev/null
+++ b/drivers/tee/Makefile
@@ -0,0 +1,4 @@
+obj-y += tee.o
+obj-y += tee_shm.o
+obj-y += tee_shm_pool.o
+obj-$(CONFIG_OPTEE) += optee/
diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig
new file mode 100644
index 000000000000..a7a8b71828bd
--- /dev/null
+++ b/drivers/tee/optee/Kconfig
@@ -0,0 +1,8 @@
+# OP-TEE Trusted Execution Environment Configuration
+config OPTEE
+	tristate "OP-TEE"
+	default n
+	depends on HAVE_ARM_SMCCC
+	help
+	  This implements the OP-TEE Trusted Execution Environment (TEE)
+	  driver.
diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile
new file mode 100644
index 000000000000..92fe5789bcce
--- /dev/null
+++ b/drivers/tee/optee/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_OPTEE) += optee.o
+optee-objs += core.o
+optee-objs += call.o
+optee-objs += rpc.o
+optee-objs += supp.o
diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
new file mode 100644
index 000000000000..c567f98ec4c7
--- /dev/null
+++ b/drivers/tee/optee/call.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/tee_drv.h>
+#include <linux/arm-smccc.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct optee_call_waiter {
+	struct list_head list_node;
+	struct completion c;
+	bool completed;
+};
+
+static void optee_cq_wait_init(struct optee_call_queue *cq,
+			struct optee_call_waiter *w)
+{
+	mutex_lock(&cq->mutex);
+	/*
+	 * We add ourselves to the queue, but we don't wait. This
+	 * guarentees that we don't lose a completion if secure world
+	 * returns busy and another thread just exited and try to complete
+	 * someone.
+	 */
+	w->completed = false;
+	init_completion(&w->c);
+	list_add_tail(&w->list_node, &cq->waiters);
+	mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_wait_for_completion(struct optee_call_queue *cq,
+			struct optee_call_waiter *w)
+{
+	wait_for_completion(&w->c);
+
+	mutex_lock(&cq->mutex);
+
+	/* Move to end of list to get out of the way for other waiters */
+	list_del(&w->list_node);
+	w->completed = false;
+	reinit_completion(&w->c);
+	list_add_tail(&w->list_node, &cq->waiters);
+
+	mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_complete_one(struct optee_call_queue *cq)
+{
+	struct optee_call_waiter *w;
+
+	list_for_each_entry(w, &cq->waiters, list_node) {
+		if (!w->completed) {
+			complete(&w->c);
+			w->completed = true;
+			break;
+		}
+	}
+}
+
+static void optee_cq_wait_final(struct optee_call_queue *cq,
+			struct optee_call_waiter *w)
+{
+	mutex_lock(&cq->mutex);
+
+	/* Get out of the list */
+	list_del(&w->list_node);
+
+	optee_cq_complete_one(cq);
+	/*
+	 * If we're completed we've got a completion that some other task
+	 * could have used instead.
+	 */
+	if (w->completed)
+		optee_cq_complete_one(cq);
+
+	mutex_unlock(&cq->mutex);
+}
+
+/* Requires the filpstate mutex to be held */
+static struct optee_session *find_session(struct optee_context_data *ctxdata,
+			u32 session_id)
+{
+	struct optee_session *sess;
+
+	list_for_each_entry(sess, &ctxdata->sess_list, list_node)
+		if (sess->session_id == session_id)
+			return sess;
+	return NULL;
+}
+
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg)
+{
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+	struct optee_call_waiter w;
+	struct optee_rpc_param param = { };
+	u32 ret;
+
+	param.a0 = OPTEE_SMC_CALL_WITH_ARG;
+	reg_pair_from_64(&param.a1, &param.a2, parg);
+	/* Initialize waiter */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		struct arm_smccc_res res;
+
+		optee->invoke_fn(param.a0, param.a1, param.a2, param.a3,
+				 param.a4, param.a5, param.a6, param.a7,
+				 &res);
+
+		if (res.a0 == OPTEE_SMC_RETURN_ETHREAD_LIMIT) {
+			/*
+			 * Out of threads in secure world, wait for a thread
+			 * become available.
+			 */
+			optee_cq_wait_for_completion(&optee->call_queue, &w);
+		} else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) {
+			param.a0 = res.a0;
+			param.a1 = res.a1;
+			param.a2 = res.a2;
+			param.a3 = res.a3;
+			optee_handle_rpc(ctx, &param);
+		} else {
+			ret = res.a0;
+			break;
+		}
+	}
+	/*
+	 * We're done with our thread in secure world, if there's any
+	 * thread waiters wake up one.
+	 */
+	optee_cq_wait_final(&optee->call_queue, &w);
+	return ret;
+}
+
+static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params,
+			struct optee_msg_arg **msg_arg, phys_addr_t *msg_parg)
+{
+	int rc;
+	struct tee_shm *shm;
+	struct optee_msg_arg *ma;
+
+	shm = tee_shm_alloc(ctx->teedev, OPTEE_MSG_GET_ARG_SIZE(num_params),
+			    TEE_SHM_MAPPED);
+	if (IS_ERR(shm))
+		return shm;
+	ma = tee_shm_get_va(shm, 0);
+	if (IS_ERR(ma)) {
+		rc = PTR_ERR(ma);
+		goto out;
+	}
+	rc = tee_shm_get_pa(shm, 0, msg_parg);
+	if (rc)
+		goto out;
+
+	memset(ma, 0, OPTEE_MSG_GET_ARG_SIZE(num_params));
+	ma->num_params = num_params;
+	*msg_arg = ma;
+out:
+	if (rc) {
+		tee_shm_free(shm);
+		return ERR_PTR(rc);
+	}
+	return shm;
+}
+
+int optee_open_session(struct tee_context *ctx,
+			struct tee_ioctl_open_session_arg *arg,
+			struct tee_param *param)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	int rc;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_msg_param *msg_param;
+	struct optee_session *sess = NULL;
+
+	/* +2 for the meta parameters added below */
+	shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_OPEN_SESSION;
+	msg_arg->cancel_id = arg->cancel_id;
+	msg_param = OPTEE_MSG_GET_PARAMS(msg_arg);
+
+	/*
+	 * Initialize and add the meta parameters needed when opening a
+	 * session.
+	 */
+	msg_param[0].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+			    OPTEE_MSG_ATTR_META;
+	msg_param[1].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+			    OPTEE_MSG_ATTR_META;
+	memcpy(&msg_param[0].u.value, arg->uuid, sizeof(arg->uuid));
+	memcpy(&msg_param[1].u.value, arg->uuid, sizeof(arg->clnt_uuid));
+	msg_param[1].u.value.c = arg->clnt_login;
+
+	rc = optee_to_msg_param(msg_param + 2, arg->num_params, param);
+	if (rc)
+		goto out;
+
+	sess = kzalloc(sizeof(struct optee_session), GFP_KERNEL);
+	if (!sess) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if (optee_do_call_with_arg(ctx, msg_parg)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	if (msg_arg->ret == TEEC_SUCCESS) {
+		/* A new session has been created, add it to the list. */
+		sess->session_id = msg_arg->session;
+		mutex_lock(&ctxdata->mutex);
+		list_add(&sess->list_node, &ctxdata->sess_list);
+		mutex_unlock(&ctxdata->mutex);
+		sess = NULL;
+	}
+
+	if (optee_from_msg_param(param, arg->num_params, msg_param + 2)) {
+		arg->ret = TEEC_ERROR_COMMUNICATION;
+		arg->ret_origin = TEEC_ORIGIN_COMMS;
+		/* Close session again to avoid leakage */
+		optee_close_session(ctx, msg_arg->session);
+	} else {
+		arg->session = msg_arg->session;
+		arg->ret = msg_arg->ret;
+		arg->ret_origin = msg_arg->ret_origin;
+	}
+out:
+	kfree(sess);
+	tee_shm_free(shm);
+	return rc;
+}
+
+int optee_close_session(struct tee_context *ctx, u32 session)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess;
+
+	/* Check that the session is valid and remove it from the list */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, session);
+	if (sess)
+		list_del(&sess->list_node);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+	kfree(sess);
+
+	shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+	msg_arg->session = session;
+	optee_do_call_with_arg(ctx, msg_parg);
+
+	tee_shm_free(shm);
+	return 0;
+}
+
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+			struct tee_param *param)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_msg_param *msg_param;
+	struct optee_session *sess;
+	int rc;
+
+	/* Check that the session is valid */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, arg->session);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+
+	shm = get_msg_arg(ctx, arg->num_params, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+	msg_arg->cmd = OPTEE_MSG_CMD_INVOKE_COMMAND;
+	msg_arg->func = arg->func;
+	msg_arg->session = arg->session;
+	msg_arg->cancel_id = arg->cancel_id;
+	msg_param = OPTEE_MSG_GET_PARAMS(msg_arg);
+
+	rc = optee_to_msg_param(msg_param, arg->num_params, param);
+	if (rc)
+		goto out;
+
+	if (optee_do_call_with_arg(ctx, msg_parg)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	if (optee_from_msg_param(param, arg->num_params, msg_param)) {
+		msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+		msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+	}
+
+	arg->ret = msg_arg->ret;
+	arg->ret_origin = msg_arg->ret_origin;
+out:
+	tee_shm_free(shm);
+	return rc;
+}
+
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *msg_arg;
+	phys_addr_t msg_parg;
+	struct optee_session *sess;
+
+	/* Check that the session is valid */
+	mutex_lock(&ctxdata->mutex);
+	sess = find_session(ctxdata, session);
+	mutex_unlock(&ctxdata->mutex);
+	if (!sess)
+		return -EINVAL;
+
+	shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	msg_arg->cmd = OPTEE_MSG_CMD_CANCEL;
+	msg_arg->session = session;
+	msg_arg->cancel_id = cancel_id;
+	optee_do_call_with_arg(ctx, msg_parg);
+
+	tee_shm_free(shm);
+	return 0;
+}
+
+void optee_enable_shm_cache(struct optee *optee)
+{
+	struct optee_call_waiter w;
+
+	/* We need to retry until secure world isn't busy. */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		struct arm_smccc_res res;
+
+		optee->invoke_fn(OPTEE_SMC_ENABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+				 0, &res);
+		if (res.a0 == OPTEE_SMC_RETURN_OK)
+			break;
+		optee_cq_wait_for_completion(&optee->call_queue, &w);
+	}
+	optee_cq_wait_final(&optee->call_queue, &w);
+}
+
+void optee_disable_shm_cache(struct optee *optee)
+{
+	struct optee_call_waiter w;
+
+	/* We need to retry until secure world isn't busy. */
+	optee_cq_wait_init(&optee->call_queue, &w);
+	while (true) {
+		struct arm_smccc_res res;
+
+		optee->invoke_fn(OPTEE_SMC_DISABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+				 0, &res);
+		if (res.a0 == OPTEE_SMC_RETURN_ENOTAVAIL)
+			break; /* All shm's freed */
+		if (res.a0 == OPTEE_SMC_RETURN_OK) {
+			struct tee_shm *shm;
+
+			shm = reg_pair_to_ptr(res.a1, res.a2);
+			tee_shm_free(shm);
+		} else
+			optee_cq_wait_for_completion(&optee->call_queue, &w);
+	}
+	optee_cq_wait_final(&optee->call_queue, &w);
+}
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
new file mode 100644
index 000000000000..874550c84f30
--- /dev/null
+++ b/drivers/tee/optee/core.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/tee_drv.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+#define DRIVER_NAME "optee"
+
+#define OPTEE_SHM_NUM_PRIV_PAGES	1
+
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+			const struct optee_msg_param *msg_params)
+{
+	int rc;
+	size_t n;
+	struct tee_shm *shm;
+	phys_addr_t pa;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = params + n;
+		const struct optee_msg_param *mp = msg_params + n;
+		u32 attr = mp->attr & OPTEE_MSG_ATTR_TYPE_MASK;
+
+		switch (attr) {
+		case OPTEE_MSG_ATTR_TYPE_NONE:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+			memset(&p->u, 0, sizeof(p->u));
+			break;
+		case OPTEE_MSG_ATTR_TYPE_VALUE_INPUT:
+		case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
+		case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT +
+				  attr - OPTEE_MSG_ATTR_TYPE_VALUE_INPUT;
+			p->u.value.a = mp->u.value.a;
+			p->u.value.b = mp->u.value.b;
+			p->u.value.c = mp->u.value.c;
+			break;
+		case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
+		case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
+		case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
+			p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT +
+				  attr - OPTEE_MSG_ATTR_TYPE_TMEM_INPUT;
+			p->u.memref.size = mp->u.tmem.size;
+			shm = (struct tee_shm *)(unsigned long)
+				mp->u.tmem.shm_ref;
+			if (!shm) {
+				p->u.memref.shm_offs = 0;
+				p->u.memref.shm = NULL;
+				break;
+			}
+			rc = tee_shm_get_pa(shm, 0, &pa);
+			if (rc)
+				return rc;
+			p->u.memref.shm_offs = pa - mp->u.tmem.buf_ptr;
+			p->u.memref.shm = shm;
+
+			/* Check that the memref is covered by the shm object */
+			if (p->u.memref.size) {
+				size_t o = p->u.memref.shm_offs +
+					   p->u.memref.size - 1;
+
+				rc = tee_shm_get_pa(shm, o, NULL);
+				if (rc)
+					return rc;
+			}
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+			const struct tee_param *params)
+{
+	int rc;
+	size_t n;
+	phys_addr_t pa;
+
+	for (n = 0; n < num_params; n++) {
+		const struct tee_param *p = params + n;
+		struct optee_msg_param *mp = msg_params + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+			mp->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+			memset(&mp->u, 0, sizeof(mp->u));
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			mp->attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT + p->attr -
+				   TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+			mp->u.value.a = p->u.value.a;
+			mp->u.value.b = p->u.value.b;
+			mp->u.value.c = p->u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT +
+				   p->attr -
+				   TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+			mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm;
+			mp->u.tmem.size = p->u.memref.size;
+			if (!p->u.memref.shm) {
+				mp->u.tmem.buf_ptr = 0;
+				break;
+			}
+			rc = tee_shm_get_pa(p->u.memref.shm,
+					    p->u.memref.shm_offs, &pa);
+			if (rc)
+				return rc;
+			mp->u.tmem.buf_ptr = pa;
+			mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED <<
+					OPTEE_MSG_ATTR_CACHE_SHIFT;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static void optee_get_version(struct tee_device *teedev,
+			struct tee_ioctl_version_data *vers)
+{
+	struct tee_ioctl_version_data v = {
+		.impl_id = TEE_IMPL_ID_OPTEE,
+		.impl_caps = TEE_OPTEE_CAP_TZ,
+		.gen_caps = TEE_GEN_CAP_GP,
+	};
+	*vers = v;
+}
+
+static int optee_open(struct tee_context *ctx)
+{
+	struct optee_context_data *ctxdata;
+
+	ctxdata = kzalloc(sizeof(*ctxdata), GFP_KERNEL);
+	if (!ctxdata)
+		return -ENOMEM;
+
+	mutex_init(&ctxdata->mutex);
+	INIT_LIST_HEAD(&ctxdata->sess_list);
+
+	ctx->data = ctxdata;
+	return 0;
+}
+
+static void optee_release(struct tee_context *ctx)
+{
+	struct optee_context_data *ctxdata = ctx->data;
+	struct tee_shm *shm;
+	struct optee_msg_arg *arg = NULL;
+	phys_addr_t parg;
+
+	if (!ctxdata)
+		return;
+
+	shm = tee_shm_alloc(ctx->teedev, sizeof(struct optee_msg_arg),
+			    TEE_SHM_MAPPED);
+	if (!IS_ERR(shm)) {
+		arg = tee_shm_get_va(shm, 0);
+		/*
+		 * If va2pa fails for some reason, we can't call
+		 * optee_close_session(), only free the memory. Secure OS
+		 * will leak sessions and finally refuse more session, but
+		 * we will at least let normal world reclaim its memory.
+		 */
+		if (!IS_ERR(arg))
+			tee_shm_va2pa(shm, arg, &parg);
+	}
+
+	while (true) {
+		struct optee_session *sess;
+
+		sess = list_first_entry_or_null(&ctxdata->sess_list,
+						struct optee_session,
+						list_node);
+		if (!sess)
+			break;
+		list_del(&sess->list_node);
+		if (!IS_ERR_OR_NULL(arg)) {
+			memset(arg, 0, sizeof(*arg));
+			arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+			arg->session = sess->session_id;
+			optee_do_call_with_arg(ctx, parg);
+		}
+		kfree(sess);
+	}
+	kfree(ctxdata);
+
+	if (!IS_ERR(shm))
+		tee_shm_free(shm);
+
+	ctx->data = NULL;
+}
+
+static struct tee_driver_ops optee_ops = {
+	.get_version = optee_get_version,
+	.open = optee_open,
+	.release = optee_release,
+	.open_session = optee_open_session,
+	.close_session = optee_close_session,
+	.invoke_func = optee_invoke_func,
+	.cancel_req = optee_cancel_req,
+};
+
+static struct tee_desc optee_desc = {
+	.name = DRIVER_NAME "-clnt",
+	.ops = &optee_ops,
+	.owner = THIS_MODULE,
+};
+
+static struct tee_driver_ops optee_supp_ops = {
+	.get_version = optee_get_version,
+	.open = optee_open,
+	.release = optee_release,
+	.supp_recv = optee_supp_recv,
+	.supp_send = optee_supp_send,
+};
+
+static struct tee_desc optee_supp_desc = {
+	.name = DRIVER_NAME "-supp",
+	.ops = &optee_supp_ops,
+	.owner = THIS_MODULE,
+	.flags = TEE_DESC_PRIVILEGED,
+};
+
+static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn)
+{
+	struct arm_smccc_res res;
+
+	invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+	if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 &&
+	    res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3)
+		return true;
+	return false;
+}
+
+static bool optee_msg_api_revision_is_compatible(optee_invoke_fn *invoke_fn)
+{
+	struct arm_smccc_res res;
+
+	invoke_fn(OPTEE_SMC_CALLS_REVISION, 0, 0, 0, 0, 0, 0, 0, &res);
+
+	if (res.a0 == OPTEE_MSG_REVISION_MAJOR &&
+	    (int)res.a1 >= OPTEE_MSG_REVISION_MINOR)
+		return true;
+	return false;
+}
+
+static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn,
+			u32 *sec_caps)
+{
+	struct arm_smccc_res res;
+	u32 a1 = 0;
+
+	/*
+	 * TODO This isn't enough to tell if it's UP system (from kernel
+	 * point of view) or not, is_smp() returns the the information
+	 * needed, but can't be called directly from here.
+	 */
+#ifndef CONFIG_SMP
+	a1 |= OPTEE_SMC_NSEC_CAP_UNIPROCESSOR;
+#endif
+
+	invoke_fn(OPTEE_SMC_EXCHANGE_CAPABILITIES, a1, 0, 0, 0, 0, 0, 0, &res);
+
+	if (res.a0 != OPTEE_SMC_RETURN_OK)
+		return false;
+
+	*sec_caps = res.a1;
+	return true;
+}
+
+static struct tee_shm_pool *optee_config_shm_ioremap(struct device *dev,
+			optee_invoke_fn *invoke_fn,
+			void __iomem **ioremaped_shm)
+{
+	struct arm_smccc_res res;
+	struct tee_shm_pool *pool;
+	unsigned long vaddr;
+	phys_addr_t paddr;
+	size_t size;
+	phys_addr_t begin;
+	phys_addr_t end;
+	void __iomem *va;
+	struct tee_shm_pool_mem_info priv_info;
+	struct tee_shm_pool_mem_info dmabuf_info;
+
+	invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res);
+	if (res.a0 != OPTEE_SMC_RETURN_OK) {
+		dev_info(dev, "shm service not available\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	if (res.a3 != OPTEE_SMC_SHM_CACHED) {
+		dev_err(dev, "only normal cached shared memory supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	begin = roundup(res.a1, PAGE_SIZE);
+	end = rounddown(res.a1 + res.a2, PAGE_SIZE);
+	paddr = begin;
+	size = end - begin;
+
+	if (size < 2 * OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE) {
+		dev_err(dev, "too small shared memory area\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	va = ioremap_cache(paddr, size);
+	if (!va) {
+		dev_err(dev, "shared memory ioremap failed\n");
+		return ERR_PTR(-EINVAL);
+	}
+	vaddr = (unsigned long)va;
+
+	priv_info.vaddr = vaddr;
+	priv_info.paddr = paddr;
+	priv_info.size = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.vaddr = vaddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.paddr = paddr + OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+	dmabuf_info.size = size - OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+
+	pool = tee_shm_pool_alloc_res_mem(dev, &priv_info, &dmabuf_info);
+	if (IS_ERR(pool))
+		iounmap(va);
+	else
+		*ioremaped_shm = va;
+	return pool;
+}
+
+static int get_invoke_func(struct device *dev, optee_invoke_fn **invoke_fn)
+{
+	struct device_node *np = dev->of_node;
+	const char *method;
+
+	dev_info(dev, "probing for conduit method from DT.\n");
+
+	if (of_property_read_string(np, "method", &method)) {
+		dev_warn(dev, "missing \"method\" property\n");
+		return -ENXIO;
+	}
+
+	if (!strcmp("hvc", method)) {
+		*invoke_fn = arm_smccc_hvc;
+	} else if (!strcmp("smc", method)) {
+		*invoke_fn = arm_smccc_smc;
+	} else {
+		dev_warn(dev, "invalid \"method\" property: %s\n", method);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int optee_probe(struct platform_device *pdev)
+{
+	optee_invoke_fn *invoke_fn;
+	struct tee_shm_pool *pool;
+	struct optee *optee = NULL;
+	void __iomem *ioremaped_shm = NULL;
+	struct tee_device *teedev;
+	u32 sec_caps;
+	int rc;
+
+	rc = get_invoke_func(&pdev->dev, &invoke_fn);
+	if (rc)
+		return rc;
+
+	if (!optee_msg_api_uid_is_optee_api(invoke_fn) ||
+	    !optee_msg_api_revision_is_compatible(invoke_fn) ||
+	    !optee_msg_exchange_capabilities(invoke_fn, &sec_caps))
+		return -EINVAL;
+
+	/*
+	 * We have no other option for shared memory, if secure world
+	 * doesn't have any reserved memory we can use we can't continue.
+	 */
+	if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVERED_SHM))
+		return -EINVAL;
+
+	pool = optee_config_shm_ioremap(&pdev->dev, invoke_fn, &ioremaped_shm);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
+
+	optee = devm_kzalloc(&pdev->dev, sizeof(*optee), GFP_KERNEL);
+	if (!optee) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	optee->dev = &pdev->dev;
+	optee->invoke_fn = invoke_fn;
+
+	teedev = tee_device_alloc(&optee_desc, &pdev->dev, pool, optee);
+	if (IS_ERR(teedev)) {
+		rc = PTR_ERR(teedev);
+		goto err;
+	}
+	optee->teedev = teedev;
+
+	teedev = tee_device_alloc(&optee_supp_desc, &pdev->dev, pool, optee);
+	if (IS_ERR(teedev)) {
+		rc = PTR_ERR(teedev);
+		goto err;
+	}
+	optee->supp_teedev = teedev;
+
+	rc = tee_device_register(optee->teedev);
+	if (rc)
+		goto err;
+
+	rc = tee_device_register(optee->supp_teedev);
+	if (rc)
+		goto err;
+
+	mutex_init(&optee->call_queue.mutex);
+	INIT_LIST_HEAD(&optee->call_queue.waiters);
+	optee_wait_queue_init(&optee->wait_queue);
+	optee_supp_init(&optee->supp);
+	optee->ioremaped_shm = ioremaped_shm;
+	optee->pool = pool;
+
+	platform_set_drvdata(pdev, optee);
+
+	optee_enable_shm_cache(optee);
+
+	dev_info(&pdev->dev, "initialized driver\n");
+	return 0;
+err:
+	tee_device_unregister(optee->teedev);
+	tee_device_unregister(optee->supp_teedev);
+	if (pool)
+		tee_shm_pool_free(pool);
+	if (ioremaped_shm)
+		iounmap(optee->ioremaped_shm);
+	return rc;
+}
+
+static int optee_remove(struct platform_device *pdev)
+{
+	struct optee *optee = platform_get_drvdata(pdev);
+
+	optee_disable_shm_cache(optee);
+
+	tee_device_unregister(optee->teedev);
+	tee_device_unregister(optee->supp_teedev);
+	tee_shm_pool_free(optee->pool);
+	if (optee->ioremaped_shm)
+		iounmap(optee->ioremaped_shm);
+	optee_wait_queue_exit(&optee->wait_queue);
+	optee_supp_uninit(&optee->supp);
+	mutex_destroy(&optee->call_queue.mutex);
+	return 0;
+}
+
+static const struct of_device_id optee_match[] = {
+	{ .compatible = "linaro,optee-tz" },
+	{},
+};
+
+static struct platform_driver optee_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = optee_match,
+	},
+	.probe = optee_probe,
+	.remove = optee_remove,
+};
+
+static int __init optee_driver_init(void)
+{
+	struct device_node *node;
+
+	/*
+	 * Preferred path is /firmware/optee, but it's the matching that
+	 * matters.
+	 */
+	for_each_matching_node(node, optee_match)
+		of_platform_device_create(node, NULL, NULL);
+
+	return platform_driver_register(&optee_driver);
+}
+module_init(optee_driver_init);
+
+static void __exit optee_driver_exit(void)
+{
+	platform_driver_unregister(&optee_driver);
+}
+module_exit(optee_driver_exit);
+
+MODULE_AUTHOR("Linaro");
+MODULE_DESCRIPTION("OP-TEE driver");
+MODULE_SUPPORTED_DEVICE("");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h
new file mode 100644
index 000000000000..ca2f8aa54f5e
--- /dev/null
+++ b/drivers/tee/optee/optee_msg.h
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _OPTEE_MSG_H
+#define _OPTEE_MSG_H
+
+#include <linux/types.h>
+
+/*
+ * This file defines the OP-TEE message protocol used to communicate
+ * with an instance of OP-TEE running in secure world.
+ *
+ * This file is divided into three sections.
+ * 1. Formatting of messages.
+ * 2. Requests from normal world
+ * 3. Requests from secure world, Remote Procedure Call (RPC), handled by
+ *    tee-supplicant.
+ */
+
+/*****************************************************************************
+ * Part 1 - formatting of messages
+ *****************************************************************************/
+
+#define OPTEE_MSG_ATTR_TYPE_NONE		0x0
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INPUT		0x1
+#define OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT	0x2
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INOUT		0x3
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INPUT		0x5
+#define OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT		0x6
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INOUT		0x7
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INPUT		0x9
+#define OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT		0xa
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INOUT		0xb
+
+#define OPTEE_MSG_ATTR_TYPE_MASK		0xff
+
+/*
+ * Meta parameter to be absorbed by the Secure OS and not passed
+ * to the Trusted Application.
+ *
+ * Currently only used with OPTEE_MSG_CMD_OPEN_SESSION.
+ */
+#define OPTEE_MSG_ATTR_META			(1 << 8)
+
+/*
+ * The temporary shared memory object is not physically contigous and this
+ * temp memref is followed by another fragment until the last temp memref
+ * that doesn't have this bit set.
+ */
+#define OPTEE_MSG_ATTR_FRAGMENT			(1 << 9)
+
+/*
+ * Memory attributes for caching passed with temp memrefs. The actual value
+ * used is defined outside the message protocol with the exception of
+ * OPTEE_MSG_ATTR_CACHE_PREDEFINED which means the attributes already
+ * defined for the memory range should be used. If optee_smc.h is used as
+ * bearer of this protocol OPTEE_SMC_SHM_* is used for values.
+ */
+#define OPTEE_MSG_ATTR_CACHE_SHIFT		16
+#define OPTEE_MSG_ATTR_CACHE_MASK		0x7
+#define OPTEE_MSG_ATTR_CACHE_PREDEFINED		0
+
+/*
+ * Same values as TEE_LOGIN_* from TEE Internal API
+ */
+#define OPTEE_MSG_LOGIN_PUBLIC			0x00000000
+#define OPTEE_MSG_LOGIN_USER			0x00000001
+#define OPTEE_MSG_LOGIN_GROUP			0x00000002
+#define OPTEE_MSG_LOGIN_APPLICATION		0x00000004
+#define OPTEE_MSG_LOGIN_APPLICATION_USER	0x00000005
+#define OPTEE_MSG_LOGIN_APPLICATION_GROUP	0x00000006
+
+/**
+ * struct optee_msg_param_tmem - temporary memory reference
+ * @buf_ptr:	Address of the buffer
+ * @size:	Size of the buffer
+ * @shm_ref:	Temporary shared memory reference, pointer to a struct tee_shm
+ *
+ * Secure and normal world communicates pointers as physical address
+ * instead of the virtual address. This is because secure and normal world
+ * have completely independent memory mapping. Normal world can even have a
+ * hypervisor which need to translate the guest physical address (AKA IPA
+ * in ARM documentation) to a real physical address before passing the
+ * structure to secure world.
+ */
+struct optee_msg_param_tmem {
+	u64 buf_ptr;
+	u64 size;
+	u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_rmem - registered memory reference
+ * @offs:	Offset into shared memory reference
+ * @size:	Size of the buffer
+ * @shm_ref:	Shared memory reference, pointer to a struct tee_shm
+ */
+struct optee_msg_param_rmem {
+	u64 offs;
+	u64 size;
+	u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_value - values
+ * @a: first value
+ * @b: second value
+ * @c: third value
+ */
+struct optee_msg_param_value {
+	u64 a;
+	u64 b;
+	u64 c;
+};
+
+/**
+ * struct optee_msg_param - parameter
+ * @attr: attributes
+ * @memref: a memory reference
+ * @value: a value
+ *
+ * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in
+ * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value,
+ * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates tmem and
+ * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates rmem.
+ * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used.
+ */
+struct optee_msg_param {
+	u64 attr;
+	union {
+		struct optee_msg_param_tmem tmem;
+		struct optee_msg_param_rmem rmem;
+		struct optee_msg_param_value value;
+	} u;
+};
+
+/**
+ * struct optee_msg_arg - call argument
+ * @cmd: Command, one of OPTEE_MSG_CMD_* or OPTEE_MSG_RPC_CMD_*
+ * @func: Trusted Application function, specific to the Trusted Application,
+ *	     used if cmd == OPTEE_MSG_CMD_INVOKE_COMMAND
+ * @session: In parameter for all OPTEE_MSG_CMD_* except
+ *	     OPTEE_MSG_CMD_OPEN_SESSION where it's an output parameter instead
+ * @cancel_id: Cancellation id, a unique value to identify this request
+ * @ret: return value
+ * @ret_origin: origin of the return value
+ * @num_params: number of parameters supplied to the OS Command
+ * @params: the parameters supplied to the OS Command
+ *
+ * All normal calls to Trusted OS uses this struct. If cmd requires further
+ * information than what these field holds it can be passed as a parameter
+ * tagged as meta (setting the OPTEE_MSG_ATTR_META bit in corresponding
+ * attrs field). All parameters tagged as meta has to come first.
+ *
+ * Temp memref parameters can be fragmented if supported by the Trusted OS
+ * (when optee_smc.h is bearer of this protocol this is indicated with
+ * OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM). If a logical memref parameter is
+ * fragmented then has all but the last fragment the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in attrs. Even if a memref is fragmented
+ * it will still be presented as a single logical memref to the Trusted
+ * Application.
+ */
+struct optee_msg_arg {
+	u32 cmd;
+	u32 func;
+	u32 session;
+	u32 cancel_id;
+	u32 pad;
+	u32 ret;
+	u32 ret_origin;
+	u32 num_params;
+
+	/*
+	 * this struct is 8 byte aligned since the 'struct optee_msg_param'
+	 * which follows requires 8 byte alignment.
+	 *
+	 * Commented out element used to visualize the layout dynamic part
+	 * of the struct. This field is not available at all if
+	 * num_params == 0.
+	 *
+	 * params is accessed through the macro OPTEE_MSG_GET_PARAMS
+	 *
+	 * struct optee_msg_param params[num_params];
+	 */
+} __aligned(8);
+
+/**
+ * OPTEE_MSG_GET_PARAMS - return pointer to struct optee_msg_param *
+ *
+ * @x: Pointer to a struct optee_msg_arg
+ *
+ * Returns a pointer to the params[] inside a struct optee_msg_arg.
+ */
+#define OPTEE_MSG_GET_PARAMS(x) \
+	(struct optee_msg_param *)(((struct optee_msg_arg *)(x)) + 1)
+
+/**
+ * OPTEE_MSG_GET_ARG_SIZE - return size of struct optee_msg_arg
+ *
+ * @num_params: Number of parameters embedded in the struct optee_msg_arg
+ *
+ * Returns the size of the struct optee_msg_arg together with the number
+ * of embedded parameters.
+ */
+#define OPTEE_MSG_GET_ARG_SIZE(num_params) \
+	(sizeof(struct optee_msg_arg) + \
+	 sizeof(struct optee_msg_param) * (num_params))
+
+/*****************************************************************************
+ * Part 2 - requests from normal world
+ *****************************************************************************/
+
+/*
+ * Return the following UID if using API specified in this file without
+ * further extentions:
+ * 384fb3e0-e7f8-11e3-af63-0002a5d5c51b.
+ * Represented in 4 32-bit words in OPTEE_MSG_UID_0, OPTEE_MSG_UID_1,
+ * OPTEE_MSG_UID_2, OPTEE_MSG_UID_3.
+ */
+#define OPTEE_MSG_UID_0			0x384fb3e0
+#define OPTEE_MSG_UID_1			0xe7f811e3
+#define OPTEE_MSG_UID_2			0xaf630002
+#define OPTEE_MSG_UID_3			0xa5d5c51b
+#define OPTEE_MSG_FUNCID_CALLS_UID	0xFF01
+
+/*
+ * Returns 2.0 if using API specified in this file without further
+ * extentions. Represented in 2 32-bit words in OPTEE_MSG_REVISION_MAJOR
+ * and OPTEE_MSG_REVISION_MINOR
+ */
+#define OPTEE_MSG_REVISION_MAJOR	2
+#define OPTEE_MSG_REVISION_MINOR	0
+#define OPTEE_MSG_FUNCID_CALLS_REVISION	0xFF03
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in 4 32-bit words in the same way as
+ * OPTEE_MSG_FUNCID_CALLS_UID described above.
+ */
+#define OPTEE_MSG_OS_OPTEE_UUID_0	0x486178e0
+#define OPTEE_MSG_OS_OPTEE_UUID_1	0xe7f811e3
+#define OPTEE_MSG_OS_OPTEE_UUID_2	0xbc5e0002
+#define OPTEE_MSG_OS_OPTEE_UUID_3	0xa5d5c51b
+#define OPTEE_MSG_FUNCID_GET_OS_UUID	0x0000
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in 2 32-bit words in the same way as
+ * OPTEE_MSG_CALLS_REVISION described above.
+ */
+#define OPTEE_MSG_OS_OPTEE_REVISION_MAJOR	1
+#define OPTEE_MSG_OS_OPTEE_REVISION_MINOR	0
+#define OPTEE_MSG_FUNCID_GET_OS_REVISION	0x0001
+
+/*
+ * Do a secure call with struct optee_msg_arg as argument
+ * The OPTEE_MSG_CMD_* below defines what goes in struct optee_msg_arg::cmd
+ *
+ * OPTEE_MSG_CMD_OPEN_SESSION opens a session to a Trusted Application.
+ * The first two parameters are tagged as meta, holding two value
+ * parameters to pass the following information:
+ * param[0].u.value.a-b uuid of Trusted Application
+ * param[1].u.value.a-b uuid of Client
+ * param[1].u.value.c Login class of client OPTEE_MSG_LOGIN_*
+ *
+ * OPTEE_MSG_CMD_INVOKE_COMMAND invokes a command a previously opened
+ * session to a Trusted Application.  struct optee_msg_arg::func is Trusted
+ * Application function, specific to the Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CLOSE_SESSION closes a previously opened session to
+ * Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CANCEL cancels a currently invoked command.
+ *
+ * OPTEE_MSG_CMD_REGISTER_SHM registers a shared memory reference. The
+ * information is passed as:
+ * [in] param[0].attr			OPTEE_MSG_ATTR_TYPE_TMEM_INPUT
+ *					[| OPTEE_MSG_ATTR_FRAGMENT]
+ * [in] param[0].u.tmem.buf_ptr		physical address (of first fragment)
+ * [in] param[0].u.tmem.size		size (of first fragment)
+ * [in] param[0].u.tmem.shm_ref		holds shared memory reference
+ * ...
+ * The shared memory can optionally be fragmented, temp memrefs can follow
+ * each other with all but the last with the OPTEE_MSG_ATTR_FRAGMENT bit set.
+ *
+ * OPTEE_MSG_CMD_UNREGISTER_SHM unregisteres a previously registered shared
+ * memory reference. The information is passed as:
+ * [in] param[0].attr			OPTEE_MSG_ATTR_TYPE_RMEM_INPUT
+ * [in] param[0].u.rmem.shm_ref		holds shared memory reference
+ * [in] param[0].u.rmem.offs		0
+ * [in] param[0].u.rmem.size		0
+ */
+#define OPTEE_MSG_CMD_OPEN_SESSION	0
+#define OPTEE_MSG_CMD_INVOKE_COMMAND	1
+#define OPTEE_MSG_CMD_CLOSE_SESSION	2
+#define OPTEE_MSG_CMD_CANCEL		3
+#define OPTEE_MSG_CMD_REGISTER_SHM	4
+#define OPTEE_MSG_CMD_UNREGISTER_SHM	5
+#define OPTEE_MSG_FUNCID_CALL_WITH_ARG	0x0004
+
+/*****************************************************************************
+ * Part 3 - Requests from secure world, RPC
+ *****************************************************************************/
+
+/*
+ * All RPC is done with a struct optee_msg_arg as bearer of information,
+ * struct optee_msg_arg::arg holds values defined by OPTEE_MSG_RPC_CMD_* below
+ *
+ * RPC communication with tee-supplicant is reversed compared to normal
+ * client communication desribed above. The supplicant receives requests
+ * and sends responses.
+ */
+
+/*
+ * Load a TA into memory, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_LOAD_TA	0
+
+/*
+ * Reserved
+ */
+#define OPTEE_MSG_RPC_CMD_RPMB		1
+
+/*
+ * File system access, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_FS		2
+
+/*
+ * Get time
+ *
+ * Returns number of seconds and nano seconds since the Epoch,
+ * 1970-01-01 00:00:00 +0000 (UTC).
+ *
+ * [out] param[0].u.value.a	Number of seconds
+ * [out] param[0].u.value.b	Number of nano seconds.
+ */
+#define OPTEE_MSG_RPC_CMD_GET_TIME	3
+
+/*
+ * Wait queue primitive, helper for secure world to implement a wait queue
+ *
+ * Waiting on a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP
+ * [in] param[0].u.value.b wait key
+ *
+ * Waking up a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP
+ * [in] param[0].u.value.b wakeup key
+ */
+#define OPTEE_MSG_RPC_CMD_WAIT_QUEUE	4
+#define OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP	0
+#define OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP	1
+
+/*
+ * Suspend execution
+ *
+ * [in] param[0].value	.a number of milliseconds to suspend
+ */
+#define OPTEE_MSG_RPC_CMD_SUSPEND	5
+
+/*
+ * Allocate a piece of shared memory
+ *
+ * Shared memory can optionally be fragmented, to support that additional
+ * spare param entries are allocated to make room for eventual fragments.
+ * The spare param entries has .attr = OPTEE_MSG_ATTR_TYPE_NONE when
+ * unused. All returned temp memrefs except the last should have the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in the attr field.
+ *
+ * [in]  param[0].u.value.a		type of memory one of
+ *					OPTEE_MSG_RPC_SHM_TYPE_* below
+ * [in]  param[0].u.value.b		requested size
+ * [in]  param[0].u.value.c		required alignment
+ *
+ * [out] param[0].u.tmem.buf_ptr	physical address (of first fragment)
+ * [out] param[0].u.tmem.size		size (of first fragment)
+ * [out] param[0].u.tmem.shm_ref	shared memory reference
+ * ...
+ * [out] param[n].u.tmem.buf_ptr	physical address
+ * [out] param[n].u.tmem.size		size
+ * [out] param[n].u.tmem.shm_ref	shared memory reference (same value
+ *					as in param[n-1].u.tmem.shm_ref)
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_ALLOC	6
+/* Memory that can be shared with a non-secure user space application */
+#define OPTEE_MSG_RPC_SHM_TYPE_APPL	0
+/* Memory only shared with non-secure kernel */
+#define OPTEE_MSG_RPC_SHM_TYPE_KERNEL	1
+
+/*
+ * Free shared memory previously allocated with OPTEE_MSG_RPC_CMD_SHM_ALLOC
+ *
+ * [in]  param[0].u.value.a		type of memory one of
+ *					OPTEE_MSG_RPC_SHM_TYPE_* above
+ * [in]  param[0].u.value.b		value of shared memory reference
+ *					returned in param[0].u.tmem.shm_ref
+ *					above
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_FREE	7
+
+
+#endif /* _OPTEE_MSG_H */
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
new file mode 100644
index 000000000000..7f13cb59da4a
--- /dev/null
+++ b/drivers/tee/optee/optee_private.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef OPTEE_PRIVATE_H
+#define OPTEE_PRIVATE_H
+
+#include <linux/types.h>
+#include <linux/semaphore.h>
+#include <linux/tee_drv.h>
+#include "optee_msg.h"
+#include <linux/arm-smccc.h>
+
+#define OPTEE_MAX_ARG_SIZE	1024
+
+/* Some Global Platform error codes used in this driver */
+#define TEEC_SUCCESS			0x00000000
+#define TEEC_ERROR_BAD_PARAMETERS	0xFFFF0006
+#define TEEC_ERROR_COMMUNICATION	0xFFFF000E
+#define TEEC_ERROR_OUT_OF_MEMORY	0xFFFF000C
+
+#define TEEC_ORIGIN_COMMS		0x00000002
+
+typedef void (optee_invoke_fn)(unsigned long, unsigned long, unsigned long,
+				unsigned long, unsigned long, unsigned long,
+				unsigned long, unsigned long,
+				struct arm_smccc_res *);
+
+struct optee_call_queue {
+	struct mutex mutex;
+	struct list_head waiters;
+};
+
+struct optee_wait_queue {
+	struct mutex mu;
+	struct list_head db;
+};
+
+struct optee_supp {
+	u32 func;
+	u32 ret;
+	size_t num_params;
+	struct tee_param *param;
+
+	bool req_posted;
+	bool supp_next_send;
+	struct mutex thrd_mutex;
+	struct mutex supp_mutex;
+	struct completion data_to_supp;
+	struct completion data_from_supp;
+};
+
+struct optee {
+	struct tee_device *supp_teedev;
+	struct tee_device *teedev;
+	struct device *dev;
+	optee_invoke_fn *invoke_fn;
+	struct optee_call_queue call_queue;
+	struct optee_wait_queue wait_queue;
+	struct optee_supp supp;
+	struct tee_shm_pool *pool;
+	void __iomem *ioremaped_shm;
+};
+
+struct optee_session {
+	struct list_head list_node;
+	u32 session_id;
+};
+
+struct optee_context_data {
+	struct mutex mutex;
+	struct list_head sess_list;
+};
+
+struct optee_rpc_param {
+	u32	a0;
+	u32	a1;
+	u32	a2;
+	u32	a3;
+	u32	a4;
+	u32	a5;
+	u32	a6;
+	u32	a7;
+};
+
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param);
+
+void optee_wait_queue_init(struct optee_wait_queue *wq);
+void optee_wait_queue_exit(struct optee_wait_queue *wq);
+
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+			struct tee_param *param);
+
+int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len);
+int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len);
+void optee_supp_init(struct optee_supp *supp);
+void optee_supp_uninit(struct optee_supp *supp);
+
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+			struct tee_param *param);
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+			struct tee_param *param);
+
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg);
+int optee_open_session(struct tee_context *ctx,
+			struct tee_ioctl_open_session_arg *arg,
+			struct tee_param *param);
+int optee_close_session(struct tee_context *ctx, u32 session);
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+			struct tee_param *param);
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session);
+
+void optee_enable_shm_cache(struct optee *optee);
+void optee_disable_shm_cache(struct optee *optee);
+
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+			const struct optee_msg_param *msg_params);
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+			const struct tee_param *params);
+
+/*
+ * Small helpers
+ */
+
+static inline void *reg_pair_to_ptr(u32 reg0, u32 reg1)
+{
+	return (void *)(unsigned long)(((u64)reg0 << 32) | reg1);
+}
+
+static inline void reg_pair_from_64(u32 *reg0, u32 *reg1, u64 val)
+{
+	*reg0 = val >> 32;
+	*reg1 = val;
+}
+
+#endif /*OPTEE_PRIVATE_H*/
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
new file mode 100644
index 000000000000..60d75f1791cd
--- /dev/null
+++ b/drivers/tee/optee/optee_smc.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef OPTEE_SMC_H
+#define OPTEE_SMC_H
+
+#include <linux/arm-smccc.h>
+
+#define OPTEE_SMC_STD_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+#define OPTEE_SMC_FAST_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+
+/*
+ * Function specified by SMC Calling convention.
+ */
+#define OPTEE_SMC_FUNCID_CALLS_COUNT	0xFF00
+#define OPTEE_SMC_CALLS_COUNT \
+	ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \
+			   SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_COUNT)
+
+/*
+ * Normal cached memory (write-back), shareable for SMP systems and not
+ * shareable for UP systems.
+ */
+#define OPTEE_SMC_SHM_CACHED		1
+
+/*
+ * a0..a7 is used as register names in the descriptions below, on arm32
+ * that translates to r0..r7 and on arm64 to w0..w7. In both cases it's
+ * 32-bit registers.
+ */
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Return one of the following UIDs if using API specified in this file
+ * without further extentions:
+ * 65cb6b93-af0c-4617-8ed6-644a8d1140f8
+ * see also OPTEE_SMC_UID_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_UID OPTEE_MSG_FUNCID_CALLS_UID
+#define OPTEE_SMC_CALLS_UID \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_UID)
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Returns 2.0 if using API specified in this file without further extentions.
+ * see also OPTEE_MSG_REVISION_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_REVISION OPTEE_MSG_FUNCID_CALLS_REVISION
+#define OPTEE_SMC_CALLS_REVISION \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+			   ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+			   OPTEE_SMC_FUNCID_CALLS_REVISION)
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in a0-4 in the same way as OPTEE_SMC_CALLS_UID
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_UUID OPTEE_MSG_FUNCID_GET_OS_UUID
+#define OPTEE_SMC_CALL_GET_OS_UUID \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_UUID)
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in a0-1 in the same way as OPTEE_SMC_CALLS_REVISION
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_REVISION OPTEE_MSG_FUNCID_GET_OS_REVISION
+#define OPTEE_SMC_CALL_GET_OS_REVISION \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_REVISION)
+
+/*
+ * Call with struct optee_msg_arg as argument
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC*CALL_WITH_ARG
+ * a1	Upper 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a2	Lower 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a3	Cache settings, not used if physical pointer is in a predefined shared
+ *	memory area else per OPTEE_SMC_SHM_*
+ * a4-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_*
+ * a1-3	Not used
+ * a4-7	Preserved
+ *
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_ETHREAD_LIMIT
+ * a1-3	Preserved
+ * a4-7	Preserved
+ *
+ * RPC return register usage:
+ * a0	Return value, OPTEE_SMC_RETURN_IS_RPC(val)
+ * a1-2	RPC parameters
+ * a3-7	Resume information, must be preserved
+ *
+ * Possible return values:
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION	Trusted OS does not recognize this
+ *					function.
+ * OPTEE_SMC_RETURN_OK			Call completed, result updated in
+ *					the previously supplied struct
+ *					optee_msg_arg.
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT	Number of Trusted OS threads exceeded,
+ *					try again later.
+ * OPTEE_SMC_RETURN_EBADADDR		Bad physcial pointer to struct
+ *					optee_msg_arg.
+ * OPTEE_SMC_RETURN_EBADCMD		Bad/unknown cmd in struct optee_msg_arg
+ * OPTEE_SMC_RETURN_IS_RPC()		Call suspended by RPC call to normal
+ *					world.
+ */
+#define OPTEE_SMC_FUNCID_CALL_WITH_ARG OPTEE_MSG_FUNCID_CALL_WITH_ARG
+#define OPTEE_SMC_CALL_WITH_ARG \
+	OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_CALL_WITH_ARG)
+
+/*
+ * Get Shared Memory Config
+ *
+ * Returns the Secure/Non-secure shared memory config.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_GET_SHM_CONFIG
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Have config return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	Physical address of start of SHM
+ * a2	Size of of SHM
+ * a3	Cache settings of memory, as defined by the
+ *	OPTEE_SMC_SHM_* values above
+ * a4-7	Preserved
+ *
+ * Not available register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-3 Not used
+ * a4-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_GET_SHM_CONFIG	7
+#define OPTEE_SMC_GET_SHM_CONFIG \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_SHM_CONFIG)
+
+/*
+ * Exchanges capabilities between normal world and secure world
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_EXCHANGE_CAPABILITIES
+ * a1	bitfield of normal world capabilities OPTEE_SMC_NSEC_CAP_*
+ * a2-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7	Preserved
+ *
+ * Error return register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL, can't use the capabilities from normal world
+ * a1	bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7 Preserved
+ */
+/* Normal world works as a uniprocessor system */
+#define OPTEE_SMC_NSEC_CAP_UNIPROCESSOR		(1 << 0)
+/* Secure world has reserved shared memory for normal world to use */
+#define OPTEE_SMC_SEC_CAP_HAVE_RESERVERED_SHM	(1 << 0)
+/* Secure world can communicate via previously unregistered shared memory */
+#define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM	(1 << 1)
+#define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES	9
+#define OPTEE_SMC_EXCHANGE_CAPABILITIES \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES)
+
+/*
+ * Disable and empties cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns one shared memory reference to free. To disable the
+ * cache and free all cached objects this function has to be called until
+ * it returns OPTEE_SMC_RETURN_ENOTAVAIL.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_DISABLE_SHM_CACHE
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1	Upper 32bit of a 64bit Shared memory cookie
+ * a2	Lower 32bit of a 64bit Shared memory cookie
+ * a3-7	Preserved
+ *
+ * Cache empty return register usage:
+ * a0	OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-7	Preserved
+ *
+ * Not idle return register usage:
+ * a0	OPTEE_SMC_RETURN_EBUSY
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE	10
+#define OPTEE_SMC_DISABLE_SHM_CACHE \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE)
+
+
+/*
+ * Enable cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns OPTEE_SMC_RETURN_OK and the cache is enabled. If
+ * secure world isn't idle OPTEE_SMC_RETURN_EBUSY is returned.
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_ENABLE_SHM_CACHE
+ * a1-6	Not used
+ * a7	Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0	OPTEE_SMC_RETURN_OK
+ * a1-7	Preserved
+ *
+ * Not idle return register usage:
+ * a0	OPTEE_SMC_RETURN_EBUSY
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE	11
+#define OPTEE_SMC_ENABLE_SHM_CACHE \
+	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE)
+
+/*
+ * Resume from RPC (for example after processing an IRQ)
+ *
+ * Call register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC
+ * a1-3	Value of a1-3 when OPTEE_SMC_CALL_WITH_ARG returned
+ *	OPTEE_SMC_RETURN_RPC in a0
+ *
+ * Return register usage is the same as for OPTEE_SMC_*CALL_WITH_ARG above.
+ *
+ * Possible return values
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION	Trusted OS does not recognize this
+ *					function.
+ * OPTEE_SMC_RETURN_OK			Original call completed, result
+ *					updated in the previously supplied.
+ *					struct optee_msg_arg
+ * OPTEE_SMC_RETURN_RPC			Call suspended by RPC call to normal
+ *					world.
+ * OPTEE_SMC_RETURN_ERESUME		Resume failed, the opaque resume
+ *					information was corrupt.
+ */
+#define OPTEE_SMC_FUNCID_RETURN_FROM_RPC	3
+#define OPTEE_SMC_CALL_RETURN_FROM_RPC \
+	OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_RETURN_FROM_RPC)
+
+#define OPTEE_SMC_RETURN_RPC_PREFIX_MASK	0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_PREFIX		0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_FUNC_MASK		0x0000FFFF
+
+#define OPTEE_SMC_RETURN_GET_RPC_FUNC(ret) \
+	((ret) & OPTEE_SMC_RETURN_RPC_FUNC_MASK)
+
+#define OPTEE_SMC_RPC_VAL(func)		((func) | OPTEE_SMC_RETURN_RPC_PREFIX)
+
+/*
+ * Allocate memory for RPC parameter passing. The memory is used to hold a
+ * struct optee_msg_arg.
+ *
+ * "Call" register usage:
+ * a0	This value, OPTEE_SMC_RETURN_RPC_ALLOC
+ * a1	Size in bytes of required argument memory
+ * a2	Not used
+ * a3	Resume information, must be preserved
+ * a4-5	Not used
+ * a6-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1	Upper 32bits of 64bit physical pointer to allocated
+ *	memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ *	be allocated.
+ * a2	Lower 32bits of 64bit physical pointer to allocated
+ *	memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ *	be allocated
+ * a3	Preserved
+ * a4	Upper 32bits of 64bit Shared memory cookie used when freeing
+ *	the memory or doing an RPC
+ * a5	Lower 32bits of 64bit Shared memory cookie used when freeing
+ *	the memory or doing an RPC
+ * a6-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_ALLOC	0
+#define OPTEE_SMC_RETURN_RPC_ALLOC \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_ALLOC)
+
+/*
+ * Free memory previously allocated by OPTEE_SMC_RETURN_RPC_ALLOC
+ *
+ * "Call" register usage:
+ * a0	This value, OPTEE_SMC_RETURN_RPC_FREE
+ * a1	Upper 32bits of 64bit shared memory cookie belonging to this
+ *	argument memory
+ * a2	Lower 32bits of 64bit shared memory cookie belonging to this
+ *	argument memory
+ * a3-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2	Not used
+ * a3-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_FREE		2
+#define OPTEE_SMC_RETURN_RPC_FREE \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE)
+
+/*
+ * Deliver an IRQ in normal world.
+ *
+ * "Call" register usage:
+ * a0	OPTEE_SMC_RETURN_RPC_IRQ
+ * a1-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_IRQ		4
+#define OPTEE_SMC_RETURN_RPC_IRQ \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_IRQ)
+
+/*
+ * Do an RPC request. The supplied struct optee_msg_arg tells which
+ * request to do and the parameters for the request. The following fields
+ * are used (the rest are unused):
+ * - cmd		the Request ID
+ * - ret		return value of the request, filled in by normal world
+ * - num_params		number of parameters for the request
+ * - params		the parameters
+ * - param_attrs	attributes of the parameters
+ *
+ * "Call" register usage:
+ * a0	OPTEE_SMC_RETURN_RPC_CMD
+ * a1	Upper 32bit of a 64bit Shared memory cookie holding a
+ *	struct optee_msg_arg, must be preserved, only the data should
+ *	be updated
+ * a2	Lower 32bit of a 64bit Shared memory cookie holding a
+ *	struct optee_msg_arg, must be preserved, only the data should
+ *	be updated
+ * a3-7	Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0	SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2	Not used
+ * a3-7	Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_CMD		5
+#define OPTEE_SMC_RETURN_RPC_CMD \
+	OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_CMD)
+
+/* Returned in a0 */
+#define OPTEE_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF
+
+/* Returned in a0 only from Trusted OS functions */
+#define OPTEE_SMC_RETURN_OK		0x0
+#define OPTEE_SMC_RETURN_ETHREAD_LIMIT	0x1
+#define OPTEE_SMC_RETURN_EBUSY		0x2
+#define OPTEE_SMC_RETURN_ERESUME	0x3
+#define OPTEE_SMC_RETURN_EBADADDR	0x4
+#define OPTEE_SMC_RETURN_EBADCMD	0x5
+#define OPTEE_SMC_RETURN_ENOMEM		0x6
+#define OPTEE_SMC_RETURN_ENOTAVAIL	0x7
+#define OPTEE_SMC_RETURN_IS_RPC(ret) \
+	(((ret) != OPTEE_SMC_RETURN_UNKNOWN_FUNCTION) && \
+	((((ret) & OPTEE_SMC_RETURN_RPC_PREFIX_MASK) == \
+		OPTEE_SMC_RETURN_RPC_PREFIX)))
+
+#endif /* OPTEE_SMC_H */
diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
new file mode 100644
index 000000000000..94b6ef8e00c7
--- /dev/null
+++ b/drivers/tee/optee/rpc.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/tee_drv.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct wq_entry {
+	struct list_head link;
+	struct completion c;
+	u32 key;
+};
+
+void optee_wait_queue_init(struct optee_wait_queue *priv)
+{
+	mutex_init(&priv->mu);
+	INIT_LIST_HEAD(&priv->db);
+}
+
+void optee_wait_queue_exit(struct optee_wait_queue *priv)
+{
+	mutex_destroy(&priv->mu);
+}
+
+static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg)
+{
+	struct optee_msg_param *params;
+	struct timespec64 ts;
+
+	if (arg->num_params != 1)
+		goto bad;
+	params = OPTEE_MSG_GET_PARAMS(arg);
+	if ((params->attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT)
+		goto bad;
+
+	getnstimeofday64(&ts);
+	params->u.value.a = ts.tv_sec;
+	params->u.value.b = ts.tv_nsec;
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static struct wq_entry *wq_entry_get(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w;
+
+	mutex_lock(&wq->mu);
+
+	list_for_each_entry(w, &wq->db, link)
+		if (w->key == key)
+			goto out;
+
+	w = kmalloc(sizeof(struct wq_entry), GFP_KERNEL);
+	if (w) {
+		init_completion(&w->c);
+		w->key = key;
+		list_add_tail(&w->link, &wq->db);
+	}
+out:
+	mutex_unlock(&wq->mu);
+	return w;
+}
+
+static void wq_sleep(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w = wq_entry_get(wq, key);
+
+	if (w) {
+		wait_for_completion(&w->c);
+		mutex_lock(&wq->mu);
+		list_del(&w->link);
+		mutex_unlock(&wq->mu);
+		kfree(w);
+	}
+}
+
+static void wq_wakeup(struct optee_wait_queue *wq, u32 key)
+{
+	struct wq_entry *w = wq_entry_get(wq, key);
+
+	if (w)
+		complete(&w->c);
+}
+
+static void handle_rpc_func_cmd_wq(struct optee *optee,
+			struct optee_msg_arg *arg)
+{
+	struct optee_msg_param *params;
+
+	if (arg->num_params != 1)
+		goto bad;
+
+	params = OPTEE_MSG_GET_PARAMS(arg);
+	if ((params->attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+		goto bad;
+
+	switch (params->u.value.a) {
+	case OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP:
+		wq_sleep(&optee->wait_queue, params->u.value.b);
+		break;
+	case OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP:
+		wq_wakeup(&optee->wait_queue, params->u.value.b);
+		break;
+	default:
+		goto bad;
+	}
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg)
+{
+	struct optee_msg_param *params;
+	u32 msec_to_wait;
+
+	if (arg->num_params != 1)
+		goto bad;
+
+	params = OPTEE_MSG_GET_PARAMS(arg);
+	if ((params->attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+			OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+		goto bad;
+
+	msec_to_wait = params->u.value.a;
+
+	/* set task's state to interruptible sleep */
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	/* take a nap */
+	schedule_timeout(msecs_to_jiffies(msec_to_wait));
+
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_supp_cmd(struct tee_context *ctx,
+			struct optee_msg_arg *arg)
+{
+	struct tee_param *params;
+	struct optee_msg_param *msg_params = OPTEE_MSG_GET_PARAMS(arg);
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	params = kmalloc_array(arg->num_params, sizeof(struct tee_param),
+			       GFP_KERNEL);
+	if (!params) {
+		arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+		return;
+	}
+
+	if (optee_from_msg_param(params, arg->num_params, msg_params)) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		goto out;
+	}
+
+	arg->ret = optee_supp_thrd_req(ctx, arg->cmd, arg->num_params, params);
+
+	if (optee_to_msg_param(msg_params, arg->num_params, params))
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+out:
+	kfree(params);
+}
+
+static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz)
+{
+	u32 ret;
+	struct tee_param param;
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+
+	param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+	param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+	param.u.value.b = sz;
+	param.u.value.c = 0;
+
+	ret = optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_ALLOC, 1, &param);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+
+	/* Increases count as secure world doesn't have a reference */
+	return tee_shm_get_from_id(optee->supp_teedev, param.u.value.c);
+}
+
+static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+			struct optee_msg_arg *arg)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee_msg_param *params = OPTEE_MSG_GET_PARAMS(arg);
+	phys_addr_t pa;
+	struct tee_shm *shm;
+	size_t sz;
+	size_t n;
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	if (!arg->num_params ||
+	    params->attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	for (n = 1; n < arg->num_params; n++) {
+		if (params[n].attr != OPTEE_MSG_ATTR_TYPE_NONE) {
+			arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+			return;
+		}
+	}
+
+	sz = params->u.value.b;
+	switch (params->u.value.a) {
+	case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+		shm = cmd_alloc_suppl(ctx, sz);
+		break;
+	case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+		shm = tee_shm_alloc(teedev, sz, TEE_SHM_MAPPED);
+		break;
+	default:
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	if (IS_ERR(shm)) {
+		arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+		return;
+	}
+
+	if (tee_shm_get_pa(shm, 0, &pa)) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		goto bad;
+	}
+
+	params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT;
+	params[0].u.tmem.buf_ptr = pa;
+	params[0].u.tmem.size = sz;
+	params[0].u.tmem.shm_ref = (unsigned long)shm;
+	arg->ret = TEEC_SUCCESS;
+	return;
+bad:
+	tee_shm_free(shm);
+}
+
+static void cmd_free_suppl(struct tee_context *ctx, struct tee_shm *shm)
+{
+	struct tee_param param;
+
+	param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+	param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+	param.u.value.b = tee_shm_get_id(shm);
+	param.u.value.c = 0;
+
+	optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_FREE, 1, &param);
+	/*
+	 * Match the tee_shm_get_from_id() in cmd_alloc_suppl() as secure
+	 * world has released its reference.
+	 */
+	tee_shm_put(shm);
+}
+
+static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx,
+			struct optee_msg_arg *arg)
+{
+	struct optee_msg_param *params = OPTEE_MSG_GET_PARAMS(arg);
+	struct tee_shm *shm;
+
+	arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+	if (arg->num_params != 1 ||
+	    params->attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+		return;
+	}
+
+	shm = (struct tee_shm *)(unsigned long)params->u.value.b;
+	switch (params->u.value.a) {
+	case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+		cmd_free_suppl(ctx, shm);
+		break;
+	case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+		tee_shm_free(shm);
+		break;
+	default:
+		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+	}
+	arg->ret = TEEC_SUCCESS;
+}
+
+static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee,
+			struct tee_shm *shm)
+{
+	struct optee_msg_arg *arg;
+
+	arg = tee_shm_get_va(shm, 0);
+	if (IS_ERR(arg)) {
+		dev_err(optee->dev, "%s: tee_shm_get_va %p failed\n",
+			__func__, shm);
+		return;
+	}
+
+	switch (arg->cmd) {
+	case OPTEE_MSG_RPC_CMD_GET_TIME:
+		handle_rpc_func_cmd_get_time(arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_WAIT_QUEUE:
+		handle_rpc_func_cmd_wq(optee, arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SUSPEND:
+		handle_rpc_func_cmd_wait(arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SHM_ALLOC:
+		handle_rpc_func_cmd_shm_alloc(ctx, arg);
+		break;
+	case OPTEE_MSG_RPC_CMD_SHM_FREE:
+		handle_rpc_func_cmd_shm_free(ctx, arg);
+		break;
+	default:
+		handle_rpc_supp_cmd(ctx, arg);
+	}
+}
+
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct tee_shm *shm;
+	phys_addr_t pa;
+
+	switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
+	case OPTEE_SMC_RPC_FUNC_ALLOC:
+		shm = tee_shm_alloc(teedev, param->a1, TEE_SHM_MAPPED);
+		if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
+			reg_pair_from_64(&param->a1, &param->a2, pa);
+			reg_pair_from_64(&param->a4, &param->a5,
+					 (unsigned long)shm);
+		} else {
+			param->a1 = 0;
+			param->a2 = 0;
+			param->a4 = 0;
+			param->a5 = 0;
+		}
+		break;
+	case OPTEE_SMC_RPC_FUNC_FREE:
+		shm = reg_pair_to_ptr(param->a1, param->a2);
+		tee_shm_free(shm);
+		break;
+	case OPTEE_SMC_RPC_FUNC_IRQ:
+		/*
+		 * An IRQ was raised while secure world was executing,
+		 * since all IRQs a handled in Linux a dummy RPC is
+		 * performed to let Linux take the IRQ through the normal
+		 * vector.
+		 */
+		break;
+	case OPTEE_SMC_RPC_FUNC_CMD:
+		shm = reg_pair_to_ptr(param->a1, param->a2);
+		handle_rpc_func_cmd(ctx, optee, shm);
+		break;
+	default:
+		dev_warn(optee->dev, "Unknown RPC func 0x%x\n",
+			 (u32)OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0));
+		break;
+	}
+
+	param->a0 = OPTEE_SMC_CALL_RETURN_FROM_RPC;
+}
diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c
new file mode 100644
index 000000000000..b6478b63d379
--- /dev/null
+++ b/drivers/tee/optee/supp.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+
+void optee_supp_init(struct optee_supp *supp)
+{
+	memset(supp, 0, sizeof(*supp));
+	mutex_init(&supp->thrd_mutex);
+	mutex_init(&supp->supp_mutex);
+	init_completion(&supp->data_to_supp);
+	init_completion(&supp->data_from_supp);
+}
+
+void optee_supp_uninit(struct optee_supp *supp)
+{
+	mutex_destroy(&supp->thrd_mutex);
+	mutex_destroy(&supp->supp_mutex);
+}
+
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+			struct tee_param *param)
+{
+	struct optee *optee = tee_get_drvdata(ctx->teedev);
+	struct optee_supp *supp = &optee->supp;
+	u32 ret;
+
+	/*
+	 * Other threads blocks here until we've copied our answer from
+	 * supplicant.
+	 */
+	mutex_lock(&supp->thrd_mutex);
+
+	/*
+	 * We have exclusive access now since the supplicant at this
+	 * point is either doing a
+	 * wait_for_completion_interruptible(data_to_supp) or is in
+	 * userspace still about to do the ioctl() to enter
+	 * optee_supp_read() below.
+	 */
+
+	supp->func = func;
+	supp->num_params = num_params;
+	supp->param = param;
+	supp->req_posted = true;
+
+	/* Let supplicant get the data */
+	complete(&supp->data_to_supp);
+
+	/*
+	 * Wait for supplicant to process and return result, once we've
+	 * returned from wait_for_completion(data_from_supp) we have
+	 * exclusive access again.
+	 */
+	wait_for_completion(&supp->data_from_supp);
+
+	ret = supp->ret;
+	supp->param = NULL;
+	supp->req_posted = false;
+
+	/* We're done, let someone else talk to the supplicant now. */
+	mutex_unlock(&supp->thrd_mutex);
+
+	return ret;
+}
+
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+		struct tee_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct optee_supp *supp = &optee->supp;
+	int rc;
+
+	/*
+	 * In case two supplicants or two threads in one supplicant is
+	 * calling this function simultaneously we need to protect the
+	 * data with a mutex which we'll release before returning.
+	 */
+	mutex_lock(&supp->supp_mutex);
+
+	if (supp->supp_next_send) {
+		/*
+		 * optee_supp_recv() has been called again without
+		 * a optee_supp_send() in between. Supplicant has
+		 * probably been restarted before it was able to
+		 * write back last result. Abort last request and
+		 * wait for a new.
+		 */
+		if (supp->req_posted) {
+			supp->ret = TEEC_ERROR_COMMUNICATION;
+			supp->supp_next_send = false;
+			complete(&supp->data_from_supp);
+		}
+	}
+
+	/*
+	 * This is where supplicant will be hanging most of the
+	 * time, let's make this interruptable so we can easily
+	 * restart supplicant if needed.
+	 */
+	if (wait_for_completion_interruptible(&supp->data_to_supp)) {
+		rc = -ERESTARTSYS;
+		goto out;
+	}
+
+	/* We have exlusive access to the data */
+
+	if (*num_params < supp->num_params) {
+		/*
+		 * Not enough room for parameters, tell supplicant
+		 * it failed and abort last request.
+		 */
+		supp->ret = TEEC_ERROR_COMMUNICATION;
+		rc = -EINVAL;
+		complete(&supp->data_from_supp);
+		goto out;
+	}
+
+	*func = supp->func;
+	*num_params = supp->num_params;
+	memcpy(param, supp->param,
+	       sizeof(struct tee_param) * supp->num_params);
+
+	/* Allow optee_supp_send() below to do its work */
+	supp->supp_next_send = true;
+
+	rc = 0;
+out:
+	mutex_unlock(&supp->supp_mutex);
+	return rc;
+}
+
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+			struct tee_param *param)
+{
+	struct tee_device *teedev = ctx->teedev;
+	struct optee *optee = tee_get_drvdata(teedev);
+	struct optee_supp *supp = &optee->supp;
+	size_t n;
+	int rc = 0;
+
+	/*
+	 * We still have exclusive access to the data since that's how we
+	 * left it when returning from optee_supp_read().
+	 */
+
+	/* See comment on mutex in optee_supp_read() above */
+	mutex_lock(&supp->supp_mutex);
+
+	if (!supp->supp_next_send) {
+		/*
+		 * Something strange is going on, supplicant shouldn't
+		 * enter optee_supp_send() in this state
+		 */
+		rc = -ENOENT;
+		goto out;
+	}
+
+	if (num_params != supp->num_params) {
+		/*
+		 * Something is wrong, let supplicant restart. Next call to
+		 * optee_supp_recv() will give an error to the requesting
+		 * thread and release it.
+		 */
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Update out and in/out parameters */
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = supp->param + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			p->u.value.a = param[n].u.value.a;
+			p->u.value.b = param[n].u.value.b;
+			p->u.value.c = param[n].u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			p->u.memref.size = param[n].u.memref.size;
+			break;
+		default:
+			break;
+		}
+	}
+	supp->ret = ret;
+
+	/* Allow optee_supp_recv() above to do its work */
+	supp->supp_next_send = false;
+
+	/* Let the requesting thread continue */
+	complete(&supp->data_from_supp);
+out:
+	mutex_unlock(&supp->supp_mutex);
+	return rc;
+}
diff --git a/drivers/tee/tee.c b/drivers/tee/tee.c
new file mode 100644
index 000000000000..b99b9b45416d
--- /dev/null
+++ b/drivers/tee/tee.c
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+#define TEE_NUM_DEVICES	32
+
+#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x))
+
+/*
+ * Unprivileged devices in the in the lower half range and privileged
+ * devices in the upper half range.
+ */
+static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES);
+static DEFINE_SPINLOCK(driver_lock);
+
+static struct class *tee_class;
+static dev_t tee_devt;
+
+static struct tee_context *teedev_open(struct tee_device *teedev)
+{
+	int rc;
+	struct tee_context *ctx;
+
+	if (!tee_device_get(teedev))
+		return ERR_PTR(-EINVAL);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	ctx->teedev = teedev;
+	rc = teedev->desc->ops->open(ctx);
+	if (rc)
+		goto err;
+
+	return ctx;
+err:
+	kfree(ctx);
+	tee_device_put(teedev);
+	return ERR_PTR(rc);
+
+}
+
+static void teedev_close_context(struct tee_context *ctx)
+{
+	ctx->teedev->desc->ops->release(ctx);
+	tee_device_put(ctx->teedev);
+	kfree(ctx);
+}
+
+static int tee_open(struct inode *inode, struct file *filp)
+{
+	struct tee_context *ctx;
+
+	ctx = teedev_open(container_of(inode->i_cdev, struct tee_device, cdev));
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	filp->private_data = ctx;
+	return 0;
+}
+
+static int tee_release(struct inode *inode, struct file *filp)
+{
+	teedev_close_context(filp->private_data);
+	return 0;
+}
+
+static int tee_ioctl_version(struct tee_context *ctx,
+		struct tee_ioctl_version_data __user *uvers)
+{
+	struct tee_ioctl_version_data vers;
+
+	ctx->teedev->desc->ops->get_version(ctx->teedev, &vers);
+	if (copy_to_user(uvers, &vers, sizeof(vers)))
+		return -EFAULT;
+	return 0;
+}
+
+static int tee_ioctl_shm_alloc(struct tee_context *ctx,
+		struct tee_ioctl_shm_alloc_data __user *udata)
+{
+	long ret;
+	struct tee_ioctl_shm_alloc_data data;
+	struct tee_shm *shm;
+
+	if (copy_from_user(&data, udata, sizeof(data)))
+		return -EFAULT;
+
+	/* Currently no input flags are supported */
+	if (data.flags)
+		return -EINVAL;
+
+	data.id = -1;
+
+	shm = tee_shm_alloc(ctx->teedev, data.size,
+			    TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (IS_ERR(shm))
+		return PTR_ERR(shm);
+
+	data.id = shm->id;
+	data.flags = shm->flags;
+	data.size = shm->size;
+
+	if (copy_to_user(udata, &data, sizeof(data)))
+		ret = -EFAULT;
+	else
+		ret = tee_shm_get_fd(shm);
+
+	/*
+	 * When user space closes the file descriptor the shared memory
+	 * should be freed or if tee_shm_get_fd() failed then it will
+	 * be freed immediately.
+	 */
+	tee_shm_put(shm);
+	return ret;
+}
+
+static int params_from_user(struct tee_context *ctx, struct tee_param *params,
+		size_t num_params, struct tee_ioctl_param __user *uparams)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_shm *shm;
+		struct tee_ioctl_param ip;
+
+		if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+			return -EFAULT;
+
+		/* All unused attribute bits has to be zero */
+		if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK)
+			return -EINVAL;
+
+		params[n].attr = ip.attr;
+		switch (ip.attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			params[n].u.value.a = ip.u.value.a;
+			params[n].u.value.b = ip.u.value.b;
+			params[n].u.value.c = ip.u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			/*
+			 * If we fail to get a pointer to a shared memory
+			 * object (and increase the ref count) from an
+			 * identifier we return an error. All pointers that
+			 * has been added in params have an increased ref
+			 * count. It's the callers responibility to do
+			 * tee_shm_put() on all resolved pointers.
+			 */
+			shm = tee_shm_get_from_id(ctx->teedev,
+						  ip.u.memref.shm_id);
+			if (IS_ERR(shm))
+				return PTR_ERR(shm);
+
+			params[n].u.memref.shm_offs = ip.u.memref.shm_offs;
+			params[n].u.memref.size = ip.u.memref.size;
+			params[n].u.memref.shm = shm;
+			break;
+		default:
+			/* Unknown attribute */
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int params_to_user(struct tee_ioctl_param __user *uparams,
+		size_t num_params, struct tee_param *params)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_ioctl_param __user *up = uparams + n;
+		struct tee_param *p = params + n;
+
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			if (put_user(p->u.value.a, &up->u.value.a) ||
+			    put_user(p->u.value.b, &up->u.value.b) ||
+			    put_user(p->u.value.c, &up->u.value.c))
+				return -EFAULT;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			if (put_user((u64)p->u.memref.size, &up->u.memref.size))
+				return -EFAULT;
+		default:
+			break;
+		}
+	}
+	return 0;
+}
+
+static bool param_is_memref(struct tee_param *param)
+{
+	switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+	case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int tee_ioctl_open_session(struct tee_context *ctx,
+		struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	size_t n;
+	struct tee_ioctl_buf_data buf;
+	struct tee_ioctl_open_session_arg __user *uarg;
+	struct tee_ioctl_open_session_arg arg;
+	struct tee_ioctl_param __user *uparams = NULL;
+	struct tee_param *params = NULL;
+	bool have_session = false;
+
+	if (!ctx->teedev->desc->ops->open_session)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_ioctl_open_session_arg))
+		return -EINVAL;
+
+	uarg = (struct tee_ioctl_open_session_arg __user *)(unsigned long)
+		buf.buf_ptr;
+	rc = copy_from_user(&arg, uarg, sizeof(arg));
+	if (rc)
+		return rc;
+
+	if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+		return -EINVAL;
+
+	if (arg.num_params) {
+		params = kcalloc(arg.num_params, sizeof(struct tee_param),
+				GFP_KERNEL);
+		if (!params)
+			return -ENOMEM;
+		uparams = (struct tee_ioctl_param __user *)(uarg + 1);
+		rc = params_from_user(ctx, params, arg.num_params, uparams);
+		if (rc)
+			goto out;
+	}
+
+	rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params);
+	if (rc)
+		goto out;
+	have_session = true;
+
+	if (put_user(arg.session, &uarg->session) ||
+	    put_user(arg.ret, &uarg->ret) ||
+	    put_user(arg.ret_origin, &uarg->ret_origin)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rc = params_to_user(uparams, arg.num_params, params);
+out:
+	/*
+	 * If we've succeeded to open the session but failed to communicate
+	 * it back to user space, close the session again to avoid leakage.
+	 */
+	if (rc && have_session && ctx->teedev->desc->ops->close_session)
+		ctx->teedev->desc->ops->close_session(ctx, arg.session);
+
+	if (params) {
+		/* Decrease ref count for all valid shared memory pointers */
+		for (n = 0; n < arg.num_params; n++)
+			if (param_is_memref(params + n) &&
+			    params[n].u.memref.shm)
+				tee_shm_put(params[n].u.memref.shm);
+		kfree(params);
+	}
+
+	return rc;
+}
+
+static int tee_ioctl_invoke(struct tee_context *ctx,
+		struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	size_t n;
+	struct tee_ioctl_buf_data buf;
+	struct tee_ioctl_invoke_arg __user *uarg;
+	struct tee_ioctl_invoke_arg arg;
+	struct tee_ioctl_param __user *uparams = NULL;
+	struct tee_param *params = NULL;
+
+	if (!ctx->teedev->desc->ops->invoke_func)
+		return -EINVAL;
+
+	rc = copy_from_user(&buf, ubuf, sizeof(buf));
+	if (rc)
+		return rc;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_ioctl_invoke_arg))
+		return -EINVAL;
+
+	uarg = (struct tee_ioctl_invoke_arg __user *)(unsigned long)buf.buf_ptr;
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+		return -EINVAL;
+
+	if (arg.num_params) {
+		params = kcalloc(arg.num_params, sizeof(struct tee_param),
+				GFP_KERNEL);
+		if (!params)
+			return -ENOMEM;
+		uparams = (struct tee_ioctl_param __user *)(uarg + 1);
+		rc = params_from_user(ctx, params, arg.num_params, uparams);
+		if (rc)
+			goto out;
+	}
+
+	rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params);
+	if (rc)
+		goto out;
+
+	if (put_user(arg.ret, &uarg->ret) ||
+	    put_user(arg.ret_origin, &uarg->ret_origin)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rc = params_to_user(uparams, arg.num_params, params);
+out:
+	if (params) {
+		/* Decrease ref count for all valid shared memory pointers */
+		for (n = 0; n < arg.num_params; n++)
+			if (param_is_memref(params + n) &&
+			    params[n].u.memref.shm)
+				tee_shm_put(params[n].u.memref.shm);
+		kfree(params);
+	}
+	return rc;
+}
+
+
+static int tee_ioctl_cancel(struct tee_context *ctx,
+		struct tee_ioctl_cancel_arg __user *uarg)
+{
+	struct tee_ioctl_cancel_arg arg;
+
+	if (!ctx->teedev->desc->ops->cancel_req)
+		return -EINVAL;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id,
+						  arg.session);
+}
+
+static int tee_ioctl_close_session(struct tee_context *ctx,
+		struct tee_ioctl_close_session_arg __user *uarg)
+{
+	struct tee_ioctl_close_session_arg arg;
+
+	if (!ctx->teedev->desc->ops->close_session)
+		return -EINVAL;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	return ctx->teedev->desc->ops->close_session(ctx, arg.session);
+}
+
+static int params_to_supp(struct tee_context *ctx,
+		struct tee_ioctl_param __user *uparams,
+		size_t num_params, struct tee_param *params)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_ioctl_param ip;
+		struct tee_param *p = params + n;
+
+		ip.attr = p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK;
+		switch (p->attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			ip.u.value.a = p->u.value.a;
+			ip.u.value.b = p->u.value.b;
+			ip.u.value.c = p->u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			ip.u.memref.size = p->u.memref.size;
+			if (!p->u.memref.shm) {
+				ip.u.memref.shm_offs = 0;
+				ip.u.memref.shm_id = -1;
+				break;
+			}
+			ip.u.memref.shm_offs = p->u.memref.shm_offs;
+			ip.u.memref.shm_id = p->u.memref.shm->id;
+			break;
+		default:
+			memset(&ip.u, 0, sizeof(ip.u));
+			break;
+		}
+
+		if (copy_to_user(uparams + n, &ip, sizeof(ip)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int tee_ioctl_supp_recv(struct tee_context *ctx,
+		struct tee_ioctl_buf_data __user *ubuf)
+{
+	int rc;
+	struct tee_ioctl_buf_data buf;
+	struct tee_iocl_supp_recv_arg __user *uarg;
+	struct tee_param *params;
+	struct tee_ioctl_param __user *uparams;
+	u32 num_params;
+	u32 func;
+
+	if (!ctx->teedev->desc->ops->supp_recv)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg))
+		return -EINVAL;
+
+	uarg = (struct tee_iocl_supp_recv_arg __user *)(unsigned long)
+		buf.buf_ptr;
+	if (get_user(num_params, &uarg->num_params))
+		return -EFAULT;
+
+	if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len)
+		return -EINVAL;
+
+	params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params);
+	if (rc)
+		goto out;
+
+	if (put_user(func, &uarg->func) ||
+	    put_user(num_params, &uarg->num_params)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	uparams = (struct tee_ioctl_param __user *)(uarg + 1);
+	rc = params_to_supp(ctx, uparams, num_params, params);
+out:
+	kfree(params);
+	return rc;
+}
+
+static int params_from_supp(struct tee_param *params,
+		size_t num_params, struct tee_ioctl_param __user *uparams)
+{
+	size_t n;
+
+	for (n = 0; n < num_params; n++) {
+		struct tee_param *p = params + n;
+		struct tee_ioctl_param ip;
+
+		if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+			return -EFAULT;
+
+		/* All unused attribute bits has to be zero */
+		if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_TYPE_MASK)
+			return -EINVAL;
+
+		p->attr = ip.attr;
+		switch (ip.attr) {
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+			/* Only out and in/out values can be updated */
+			p->u.value.a = ip.u.value.a;
+			p->u.value.b = ip.u.value.b;
+			p->u.value.c = ip.u.value.c;
+			break;
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+			/*
+			 * Only the size of the memref can be updated.
+			 * Since we don't have access to the original
+			 * parameters here, only store the supplied size.
+			 * The driver will copy the updated size into the
+			 * original parameters.
+			 */
+			p->u.memref.shm = NULL;
+			p->u.memref.shm_offs = 0;
+			p->u.memref.size = ip.u.memref.size;
+			break;
+		default:
+			memset(&p->u, 0, sizeof(p->u));
+			break;
+		}
+	}
+	return 0;
+}
+
+static int tee_ioctl_supp_send(struct tee_context *ctx,
+		struct tee_ioctl_buf_data __user *ubuf)
+{
+	long rc;
+	struct tee_ioctl_buf_data buf;
+	struct tee_iocl_supp_send_arg __user *uarg;
+	struct tee_param *params;
+	struct tee_ioctl_param __user *uparams;
+	u32 num_params;
+	u32 ret;
+
+	/* Not valid for this driver */
+	if (!ctx->teedev->desc->ops->supp_send)
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, sizeof(buf)))
+		return -EFAULT;
+
+	if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+	    buf.buf_len < sizeof(struct tee_iocl_supp_send_arg))
+		return -EINVAL;
+
+	uarg = (struct tee_iocl_supp_send_arg __user *)(unsigned long)
+		buf.buf_ptr;
+	if (get_user(ret, &uarg->ret) ||
+	    get_user(num_params, &uarg->num_params))
+		return -EFAULT;
+
+	if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len)
+		return -EINVAL;
+
+	params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	uparams = (struct tee_ioctl_param __user *)(uarg + 1);
+	rc = params_from_supp(params, num_params, uparams);
+	if (rc)
+		goto out;
+
+	rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params);
+out:
+	kfree(params);
+	return rc;
+}
+
+
+static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct tee_context *ctx = filp->private_data;
+	void __user *uarg = (void __user *)arg;
+
+	switch (cmd) {
+	case TEE_IOC_VERSION:
+		return tee_ioctl_version(ctx, uarg);
+	case TEE_IOC_SHM_ALLOC:
+		return tee_ioctl_shm_alloc(ctx, uarg);
+	case TEE_IOC_OPEN_SESSION:
+		return tee_ioctl_open_session(ctx, uarg);
+	case TEE_IOC_INVOKE:
+		return tee_ioctl_invoke(ctx, uarg);
+	case TEE_IOC_CANCEL:
+		return tee_ioctl_cancel(ctx, uarg);
+	case TEE_IOC_CLOSE_SESSION:
+		return tee_ioctl_close_session(ctx, uarg);
+	case TEE_IOC_SUPPL_RECV:
+		return tee_ioctl_supp_recv(ctx, uarg);
+	case TEE_IOC_SUPPL_SEND:
+		return tee_ioctl_supp_send(ctx, uarg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations tee_fops = {
+	.open = tee_open,
+	.release = tee_release,
+	.unlocked_ioctl = tee_ioctl,
+	.compat_ioctl = tee_ioctl,
+};
+
+static void tee_release_device(struct device *dev)
+{
+	struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+
+	spin_lock(&driver_lock);
+	clear_bit(teedev->id, dev_mask);
+	spin_unlock(&driver_lock);
+	mutex_destroy(&teedev->mutex);
+	kfree(teedev);
+}
+
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+			struct device *dev, struct tee_shm_pool *pool,
+			void *driver_data)
+{
+	struct tee_device *teedev;
+	void *ret;
+	int rc;
+	int offs = 0;
+
+	if (!teedesc || !teedesc->name || !teedesc->ops ||
+	    !teedesc->ops->get_version || !teedesc->ops->open ||
+	    !teedesc->ops->release || !dev || !pool)
+		return ERR_PTR(-EINVAL);
+
+	teedev = kzalloc(sizeof(*teedev), GFP_KERNEL);
+	if (!teedev) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	if (teedesc->flags & TEE_DESC_PRIVILEGED)
+		offs = TEE_NUM_DEVICES / 2;
+
+	spin_lock(&driver_lock);
+	teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs);
+	if (teedev->id < TEE_NUM_DEVICES)
+		set_bit(teedev->id, dev_mask);
+	spin_unlock(&driver_lock);
+
+	if (teedev->id >= TEE_NUM_DEVICES) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	snprintf(teedev->name, sizeof(teedev->name), "tee%s%d",
+		 teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "",
+		 teedev->id - offs);
+
+	teedev->dev.class = tee_class;
+	teedev->dev.release = tee_release_device;
+	teedev->dev.parent = dev;
+	teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id);
+
+	rc = dev_set_name(&teedev->dev, "%s", teedev->name);
+	if (rc) {
+		ret = ERR_PTR(rc);
+		goto err;
+	}
+
+	cdev_init(&teedev->cdev, &tee_fops);
+	teedev->cdev.owner = teedesc->owner;
+	teedev->cdev.kobj.parent = &teedev->dev.kobj;
+
+	dev_set_drvdata(&teedev->dev, driver_data);
+	device_initialize(&teedev->dev);
+
+	/* 1 as tee_device_unregister() does one final tee_device_put() */
+	teedev->num_users = 1;
+	init_completion(&teedev->c_no_users);
+	mutex_init(&teedev->mutex);
+
+	teedev->desc = teedesc;
+	teedev->pool = pool;
+
+	return teedev;
+err:
+	dev_err(dev, "could not register %s driver\n",
+		teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client");
+	if (teedev && teedev->id < TEE_NUM_DEVICES) {
+		spin_lock(&driver_lock);
+		clear_bit(teedev->id, dev_mask);
+		spin_unlock(&driver_lock);
+	}
+	kfree(teedev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tee_device_alloc);
+
+static ssize_t implementation_id_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+	struct tee_ioctl_version_data vers;
+
+	teedev->desc->ops->get_version(teedev, &vers);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id);
+}
+static DEVICE_ATTR_RO(implementation_id);
+
+static struct attribute *tee_dev_attrs[] = {
+	&dev_attr_implementation_id.attr,
+	NULL
+};
+
+static const struct attribute_group tee_dev_group = {
+	.attrs = tee_dev_attrs,
+};
+
+int tee_device_register(struct tee_device *teedev)
+{
+	int rc;
+
+	/*
+	 * If the teedev already is registered, don't do it again. It's
+	 * obviously an error to try to register twice, but if we return
+	 * an error we'll force the driver to remove the teedev.
+	 */
+	if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+		dev_err(&teedev->dev, "attempt to register twice\n");
+		return 0;
+	}
+
+	rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"unable to cdev_add() %s, major %d, minor %d, err=%d\n",
+			teedev->name, MAJOR(teedev->dev.devt),
+			MINOR(teedev->dev.devt), rc);
+		return rc;
+	}
+
+	rc = device_add(&teedev->dev);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"unable to device_add() %s, major %d, minor %d, err=%d\n",
+			teedev->name, MAJOR(teedev->dev.devt),
+			MINOR(teedev->dev.devt), rc);
+		goto err_device_add;
+	}
+
+	rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group);
+	if (rc) {
+		dev_err(&teedev->dev,
+			"failed to create sysfs attributes, err=%d\n", rc);
+		goto err_sysfs_create_group;
+	}
+
+	teedev->flags |= TEE_DEVICE_FLAG_REGISTERED;
+	return 0;
+
+err_sysfs_create_group:
+	device_del(&teedev->dev);
+err_device_add:
+	cdev_del(&teedev->cdev);
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(tee_device_register);
+
+void tee_device_put(struct tee_device *teedev)
+{
+	mutex_lock(&teedev->mutex);
+	/* Shouldn't put in this state */
+	if (!WARN_ON(!teedev->desc)) {
+		teedev->num_users--;
+		if (!teedev->num_users) {
+			teedev->desc = NULL;
+			complete(&teedev->c_no_users);
+		}
+	}
+	mutex_unlock(&teedev->mutex);
+}
+
+bool tee_device_get(struct tee_device *teedev)
+{
+	mutex_lock(&teedev->mutex);
+	if (!teedev->desc) {
+		mutex_unlock(&teedev->mutex);
+		return false;
+	}
+	teedev->num_users++;
+	mutex_unlock(&teedev->mutex);
+	return true;
+}
+
+void tee_device_unregister(struct tee_device *teedev)
+{
+	if (!teedev)
+		return;
+
+	if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+		sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group);
+		cdev_del(&teedev->cdev);
+		device_del(&teedev->dev);
+	}
+
+	tee_device_put(teedev);
+	wait_for_completion(&teedev->c_no_users);
+
+	/*
+	 * No need to take a mutex any longer now since teedev->desc was
+	 * set to NULL before teedev->c_no_users was completed.
+	 */
+
+	teedev->pool = NULL;
+
+	put_device(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_device_unregister);
+
+void *tee_get_drvdata(struct tee_device *teedev)
+{
+	return dev_get_drvdata(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_get_drvdata);
+
+struct match_dev_data {
+	struct tee_ioctl_version_data *vers;
+	const void *data;
+	int (*match)(struct tee_ioctl_version_data *, const void *);
+};
+
+static int match_dev(struct device *dev, const void *data)
+{
+	const struct match_dev_data *match_data = data;
+	struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+
+	teedev->desc->ops->get_version(teedev, match_data->vers);
+	return match_data->match(match_data->vers, match_data->data);
+}
+
+struct tee_context *tee_client_open_context(struct tee_context *start,
+			int (*match)(struct tee_ioctl_version_data *,
+				const void *),
+			const void *data, struct tee_ioctl_version_data *vers)
+{
+	struct device *dev = NULL;
+	struct device *put_dev = NULL;
+	struct tee_context *ctx = NULL;
+	struct tee_ioctl_version_data v;
+	struct match_dev_data match_data = { vers ? vers : &v, data, match };
+
+	if (start)
+		dev = &start->teedev->dev;
+
+	do {
+		dev = class_find_device(tee_class, dev, &match_data, match_dev);
+		if (!dev) {
+			ctx = ERR_PTR(-ENOENT);
+			break;
+		}
+
+		put_device(put_dev);
+		put_dev = dev;
+
+		ctx = teedev_open(container_of(dev, struct tee_device, dev));
+	} while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM);
+
+	put_device(put_dev);
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(tee_client_open_context);
+
+void tee_client_close_context(struct tee_context *ctx)
+{
+	teedev_close_context(ctx);
+}
+
+EXPORT_SYMBOL_GPL(tee_client_close_context);
+
+void tee_client_get_version(struct tee_context *ctx,
+			struct tee_ioctl_version_data *vers)
+{
+	ctx->teedev->desc->ops->get_version(ctx->teedev, vers);
+}
+EXPORT_SYMBOL_GPL(tee_client_get_version);
+
+
+int tee_client_open_session(struct tee_context *ctx,
+			struct tee_ioctl_open_session_arg *arg,
+			struct tee_param *param)
+{
+	if (!ctx->teedev->desc->ops->open_session)
+		return -EINVAL;
+	return ctx->teedev->desc->ops->open_session(ctx, arg, param);
+}
+EXPORT_SYMBOL_GPL(tee_client_open_session);
+
+int tee_client_close_session(struct tee_context *ctx, u32 session)
+{
+	if (!ctx->teedev->desc->ops->close_session)
+		return -EINVAL;
+	return ctx->teedev->desc->ops->close_session(ctx, session);
+}
+EXPORT_SYMBOL_GPL(tee_client_close_session);
+
+int tee_client_invoke_func(struct tee_context *ctx,
+			struct tee_ioctl_invoke_arg *arg,
+			struct tee_param *param)
+{
+	if (!ctx->teedev->desc->ops->invoke_func)
+		return -EINVAL;
+	return ctx->teedev->desc->ops->invoke_func(ctx, arg, param);
+}
+EXPORT_SYMBOL_GPL(tee_client_invoke_func);
+
+static int __init tee_init(void)
+{
+	int rc;
+
+	tee_class = class_create(THIS_MODULE, "tee");
+	if (IS_ERR(tee_class)) {
+		pr_err("couldn't create class\n");
+		return PTR_ERR(tee_class);
+	}
+
+	rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee");
+	if (rc < 0) {
+		pr_err("failed to allocate char dev region\n");
+		class_destroy(tee_class);
+		tee_class = NULL;
+	}
+
+	return rc;
+}
+
+subsys_initcall(tee_init);
diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h
new file mode 100644
index 000000000000..41193e4b4f08
--- /dev/null
+++ b/drivers/tee/tee_private.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef TEE_PRIVATE_H
+#define TEE_PRIVATE_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+
+struct tee_device;
+
+struct tee_shm {
+	struct tee_device *teedev;
+	phys_addr_t paddr;
+	void *kaddr;
+	size_t size;
+	struct dma_buf *dmabuf;
+	u32 flags;
+	int id;
+};
+
+struct tee_shm_pool_mgr;
+struct tee_shm_pool_mgr_ops {
+	int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm,
+		     size_t size);
+	void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm);
+};
+
+struct tee_shm_pool_mgr {
+	const struct tee_shm_pool_mgr_ops *ops;
+	void *private_data;
+};
+
+struct tee_shm_pool {
+	struct tee_shm_pool_mgr private_mgr;
+	struct tee_shm_pool_mgr dma_buf_mgr;
+	void (*destroy)(struct tee_shm_pool *pool);
+	void *private_data;
+};
+
+#define TEE_DEVICE_FLAG_REGISTERED	0x1
+#define TEE_MAX_DEV_NAME_LEN		32
+
+struct tee_device {
+	char name[TEE_MAX_DEV_NAME_LEN];
+	const struct tee_desc *desc;
+	int id;
+	unsigned flags;
+
+	struct device dev;
+	struct cdev cdev;
+
+	size_t num_users;
+	struct completion c_no_users;
+	struct mutex mutex;
+
+	struct idr idr;
+	struct tee_shm_pool *pool;
+};
+
+int tee_shm_init(void);
+
+/**
+ * tee_shm_get_fd() - Increase reference count and return file descriptor
+ * @shm:	Shared memory handle
+ * @returns user space file descriptor to shared memory
+ */
+int tee_shm_get_fd(struct tee_shm *shm);
+
+bool tee_device_get(struct tee_device *teedev);
+void tee_device_put(struct tee_device *teedev);
+
+#endif /*TEE_PRIVATE_H*/
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
new file mode 100644
index 000000000000..41c2bcce5ea3
--- /dev/null
+++ b/drivers/tee/tee_shm.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/fdtable.h>
+#include <linux/sched.h>
+#include <linux/dma-buf.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static void tee_shm_release(struct tee_shm *shm)
+{
+	struct tee_device *teedev = shm->teedev;
+	struct tee_shm_pool_mgr *poolm;
+
+	mutex_lock(&teedev->mutex);
+	idr_remove(&teedev->idr, shm->id);
+	mutex_unlock(&teedev->mutex);
+
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		poolm = &teedev->pool->dma_buf_mgr;
+	else
+		poolm = &teedev->pool->private_mgr;
+
+	poolm->ops->free(poolm, shm);
+	kfree(shm);
+
+	tee_device_put(teedev);
+}
+
+static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment
+			*attach, enum dma_data_direction dir)
+{
+	return NULL;
+}
+
+static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach,
+			struct sg_table *table, enum dma_data_direction dir)
+{
+}
+
+static void tee_shm_op_release(struct dma_buf *dmabuf)
+{
+	struct tee_shm *shm = dmabuf->priv;
+
+	tee_shm_release(shm);
+}
+
+static void *tee_shm_op_kmap_atomic(struct dma_buf *dmabuf,
+			unsigned long pgnum)
+{
+	return NULL;
+}
+
+static void *tee_shm_op_kmap(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+	return NULL;
+}
+
+static int tee_shm_op_mmap(struct dma_buf *dmabuf,
+			struct vm_area_struct *vma)
+{
+	struct tee_shm *shm = dmabuf->priv;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
+			       size, vma->vm_page_prot);
+}
+
+static struct dma_buf_ops tee_shm_dma_buf_ops = {
+	.map_dma_buf = tee_shm_op_map_dma_buf,
+	.unmap_dma_buf = tee_shm_op_unmap_dma_buf,
+	.release = tee_shm_op_release,
+	.kmap_atomic = tee_shm_op_kmap_atomic,
+	.kmap = tee_shm_op_kmap,
+	.mmap = tee_shm_op_mmap,
+};
+
+struct tee_shm *tee_shm_alloc(struct tee_device *teedev, size_t size,
+			u32 flags)
+{
+	struct tee_shm_pool_mgr *poolm = NULL;
+	struct tee_shm *shm;
+	void *ret;
+	int rc;
+
+	if (!(flags & TEE_SHM_MAPPED)) {
+		dev_err(teedev->dev.parent,
+			"only mapped allocations supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if ((flags & ~(TEE_SHM_MAPPED|TEE_SHM_DMA_BUF))) {
+		dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!tee_device_get(teedev))
+		return ERR_PTR(-EINVAL);
+
+	if (!teedev->pool) {
+		/* teedev has been detached from driver */
+		ret = ERR_PTR(-EINVAL);
+		goto err_dev_put;
+	}
+
+	shm = kzalloc(sizeof(struct tee_shm), GFP_KERNEL);
+	if (!shm) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_dev_put;
+	}
+
+	shm->flags = flags;
+	shm->teedev = teedev;
+	if (flags & TEE_SHM_DMA_BUF)
+		poolm = &teedev->pool->dma_buf_mgr;
+	else
+		poolm = &teedev->pool->private_mgr;
+
+	rc = poolm->ops->alloc(poolm, shm, size);
+	if (rc) {
+		ret = ERR_PTR(rc);
+		goto err_kfree;
+	}
+
+
+	mutex_lock(&teedev->mutex);
+	shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+	mutex_unlock(&teedev->mutex);
+	if (shm->id < 0) {
+		ret = ERR_PTR(shm->id);
+		goto err_pool_free;
+	}
+
+	if (flags & TEE_SHM_DMA_BUF) {
+		DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+		exp_info.ops = &tee_shm_dma_buf_ops;
+		exp_info.size = shm->size;
+		exp_info.flags = O_RDWR;
+		exp_info.priv = shm;
+
+		shm->dmabuf = dma_buf_export(&exp_info);
+		if (IS_ERR(shm->dmabuf)) {
+			ret = ERR_CAST(shm->dmabuf);
+			goto err_rem;
+		}
+
+	}
+
+	return shm;
+
+err_rem:
+	mutex_lock(&teedev->mutex);
+	idr_remove(&teedev->idr, shm->id);
+	mutex_unlock(&teedev->mutex);
+err_pool_free:
+	poolm->ops->free(poolm, shm);
+err_kfree:
+	kfree(shm);
+err_dev_put:
+	tee_device_put(teedev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tee_shm_alloc);
+
+int tee_shm_get_fd(struct tee_shm *shm)
+{
+	u32 req_flags = TEE_SHM_MAPPED | TEE_SHM_DMA_BUF;
+	int fd;
+
+	if ((shm->flags & req_flags) != req_flags)
+		return -EINVAL;
+
+	fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
+	if (fd >= 0)
+		get_dma_buf(shm->dmabuf);
+	return fd;
+}
+
+void tee_shm_free(struct tee_shm *shm)
+{
+
+	/*
+	 * dma_buf_put() decreases the dmabuf reference counter and will
+	 * call tee_shm_release() when the last reference is gone.
+	 *
+	 * In the case of driver private memory we call tee_shm_release
+	 * directly instead as it doesn't have a reference counter.
+	 */
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		dma_buf_put(shm->dmabuf);
+	else
+		tee_shm_release(shm);
+}
+EXPORT_SYMBOL_GPL(tee_shm_free);
+
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa)
+{
+	/* Check that we're in the range of the shm */
+	if ((char *)va < (char *)shm->kaddr)
+		return -EINVAL;
+	if ((char *)va >= ((char *)shm->kaddr + shm->size))
+		return -EINVAL;
+
+	return tee_shm_get_pa(
+			shm, (unsigned long)va - (unsigned long)shm->kaddr, pa);
+}
+EXPORT_SYMBOL_GPL(tee_shm_va2pa);
+
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va)
+{
+	/* Check that we're in the range of the shm */
+	if (pa < shm->paddr)
+		return -EINVAL;
+	if (pa >= (shm->paddr + shm->size))
+		return -EINVAL;
+
+	if (va) {
+		void *v = tee_shm_get_va(shm, pa - shm->paddr);
+
+		if (IS_ERR(v))
+			return PTR_ERR(v);
+		*va = v;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_pa2va);
+
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs)
+{
+	if (offs >= shm->size)
+		return ERR_PTR(-EINVAL);
+	return (char *)shm->kaddr + offs;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_va);
+
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa)
+{
+	if (offs >= shm->size)
+		return -EINVAL;
+	if (pa)
+		*pa = shm->paddr + offs;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_pa);
+
+struct tee_shm *tee_shm_get_from_id(struct tee_device *teedev, int id)
+{
+	struct tee_shm *shm;
+
+	mutex_lock(&teedev->mutex);
+	shm = idr_find(&teedev->idr, id);
+	if (!shm)
+		shm = ERR_PTR(-EINVAL);
+	else if (shm->flags & TEE_SHM_DMA_BUF)
+		get_dma_buf(shm->dmabuf);
+	mutex_unlock(&teedev->mutex);
+	return shm;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_from_id);
+
+int tee_shm_get_id(struct tee_shm *shm)
+{
+	return shm->id;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_id);
+
+void tee_shm_put(struct tee_shm *shm)
+{
+	if (shm->flags & TEE_SHM_DMA_BUF)
+		dma_buf_put(shm->dmabuf);
+}
+EXPORT_SYMBOL_GPL(tee_shm_put);
diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c
new file mode 100644
index 000000000000..2ef22bc5ef4d
--- /dev/null
+++ b/drivers/tee/tee_shm_pool.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm,
+			struct tee_shm *shm, size_t size)
+{
+	unsigned long va;
+	struct gen_pool *genpool = poolm->private_data;
+	size_t s = roundup(size, 1 << genpool->min_alloc_order);
+
+	va = gen_pool_alloc(genpool, s);
+	if (!va)
+		return -ENOMEM;
+	shm->kaddr = (void *)va;
+	shm->paddr = gen_pool_virt_to_phys(genpool, va);
+	shm->size = s;
+	return 0;
+}
+
+static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm,
+			struct tee_shm *shm)
+{
+	gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr,
+		      shm->size);
+	shm->kaddr = NULL;
+}
+
+static const struct tee_shm_pool_mgr_ops pool_ops_generic = {
+	.alloc = pool_op_gen_alloc,
+	.free = pool_op_gen_free,
+};
+
+static void pool_res_mem_destroy(struct tee_shm_pool *pool)
+{
+	gen_pool_destroy(pool->private_mgr.private_data);
+	gen_pool_destroy(pool->dma_buf_mgr.private_data);
+}
+
+static int pool_res_mem_mgr_init(struct tee_shm_pool_mgr *mgr,
+			struct tee_shm_pool_mem_info *info, int min_alloc_order)
+{
+	size_t page_mask = PAGE_SIZE - 1;
+	struct gen_pool *genpool = NULL;
+	int rc;
+
+	/*
+	 * Start and end must be page aligned
+	 */
+	if ((info->vaddr & page_mask) || (info->paddr & page_mask) ||
+	    (info->size & page_mask))
+		return -EINVAL;
+
+	genpool = gen_pool_create(min_alloc_order, -1);
+	if (!genpool)
+		return -ENOMEM;
+
+	gen_pool_set_algo(genpool, gen_pool_best_fit, NULL);
+	rc = gen_pool_add_virt(genpool, info->vaddr, info->paddr, info->size,
+			       -1);
+	if (rc) {
+		gen_pool_destroy(genpool);
+		return rc;
+	}
+
+	mgr->private_data = genpool;
+	mgr->ops = &pool_ops_generic;
+	return 0;
+}
+
+struct tee_shm_pool *tee_shm_pool_alloc_res_mem(struct device *dev,
+			struct tee_shm_pool_mem_info *priv_info,
+			struct tee_shm_pool_mem_info *dmabuf_info)
+{
+	struct tee_shm_pool *pool = NULL;
+	int ret;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * Create the pool for driver private shared memory
+	 */
+	ret = pool_res_mem_mgr_init(&pool->private_mgr, priv_info,
+				    3 /* 8 byte aligned */);
+	if (ret)
+		goto err;
+
+	/*
+	 * Create the pool for dma_buf shared memory
+	 */
+	ret = pool_res_mem_mgr_init(&pool->dma_buf_mgr, dmabuf_info,
+				    PAGE_SHIFT);
+	if (ret)
+		goto err;
+
+	pool->destroy = pool_res_mem_destroy;
+	return pool;
+err:
+	if (ret == -ENOMEM)
+		dev_err(dev, "can't allocate memory for res_mem shared memory pool\n");
+	if (pool && pool->private_mgr.private_data)
+		gen_pool_destroy(pool->private_mgr.private_data);
+	kfree(pool);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem);
+
+void tee_shm_pool_free(struct tee_shm_pool *pool)
+{
+	pool->destroy(pool);
+	kfree(pool);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_free);
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 36d07295f8e3..a74b769fe85d 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -68,12 +68,12 @@ static inline int _step_to_temp(int step)
 	 * Every step equals (1 * 200) / 255 celsius, and finally
 	 * need convert to millicelsius.
 	 */
-	return (HISI_TEMP_BASE + (step * 200 / 255)) * 1000;
+	return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255));
 }
 
 static inline long _temp_to_step(long temp)
 {
-	return ((temp / 1000 - HISI_TEMP_BASE) * 255 / 200);
+	return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000;
 }
 
 static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
@@ -160,7 +160,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp)
 	struct hisi_thermal_sensor *sensor = _sensor;
 	struct hisi_thermal_data *data = sensor->thermal;
 
-	int sensor_id = 0, i;
+	int sensor_id = -1, i;
 	long max_temp = 0;
 
 	*temp = hisi_thermal_get_sensor_temp(data, sensor);
@@ -168,12 +168,19 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp)
 	sensor->sensor_temp = *temp;
 
 	for (i = 0; i < HISI_MAX_SENSORS; i++) {
+		if (!data->sensors[i].tzd)
+			continue;
+
 		if (data->sensors[i].sensor_temp >= max_temp) {
 			max_temp = data->sensors[i].sensor_temp;
 			sensor_id = i;
 		}
 	}
 
+	/* If no sensor has been enabled, then skip to enable irq */
+	if (sensor_id == -1)
+		return 0;
+
 	mutex_lock(&data->thermal_lock);
 	data->irq_bind_sensor = sensor_id;
 	mutex_unlock(&data->thermal_lock);
@@ -190,7 +197,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp)
 		return 0;
 	}
 
-	if (max_temp < sensor->thres_temp) {
+	if (!data->irq_enabled && max_temp < sensor->thres_temp) {
 		data->irq_enabled = true;
 		hisi_thermal_enable_bind_irq_sensor(data);
 		enable_irq(data->irq);
@@ -226,8 +233,12 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
 		 sensor->thres_temp / 1000);
 	mutex_unlock(&data->thermal_lock);
 
-	for (i = 0; i < HISI_MAX_SENSORS; i++)
+	for (i = 0; i < HISI_MAX_SENSORS; i++) {
+		if (!data->sensors[i].tzd)
+			continue;
+
 		thermal_zone_device_update(data->sensors[i].tzd);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -247,7 +258,8 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,
 				sensor, &hisi_of_thermal_ops);
 	if (IS_ERR(sensor->tzd)) {
 		ret = PTR_ERR(sensor->tzd);
-		dev_err(&pdev->dev, "failed to register sensor id %d: %d\n",
+		sensor->tzd = NULL;
+		dev_dbg(&pdev->dev, "failed to register sensor id %d: %d\n",
 			sensor->id, ret);
 		return ret;
 	}
@@ -331,28 +343,18 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	hisi_thermal_enable_bind_irq_sensor(data);
+	irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
+			      &data->irq_enabled);
+
 	for (i = 0; i < HISI_MAX_SENSORS; ++i) {
 		ret = hisi_thermal_register_sensor(pdev, data,
 						   &data->sensors[i], i);
-		if (ret) {
-			dev_err(&pdev->dev,
-				"failed to register thermal sensor: %d\n", ret);
-			goto err_get_sensor_data;
-		}
+		if (!ret)
+			hisi_thermal_toggle_sensor(&data->sensors[i], true);
 	}
 
-	hisi_thermal_enable_bind_irq_sensor(data);
-	data->irq_enabled = true;
-
-	for (i = 0; i < HISI_MAX_SENSORS; i++)
-		hisi_thermal_toggle_sensor(&data->sensors[i], true);
-
 	return 0;
-
-err_get_sensor_data:
-	clk_disable_unprepare(data->clk);
-
-	return ret;
 }
 
 static int hisi_thermal_remove(struct platform_device *pdev)
@@ -363,6 +365,9 @@ static int hisi_thermal_remove(struct platform_device *pdev)
 	for (i = 0; i < HISI_MAX_SENSORS; i++) {
 		struct hisi_thermal_sensor *sensor = &data->sensors[i];
 
+		if (!sensor->tzd)
+			continue;
+
 		hisi_thermal_toggle_sensor(sensor, false);
 		thermal_zone_of_sensor_unregister(&pdev->dev, sensor->tzd);
 	}
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b627392ad52a..4a9a96056fa9 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2086,6 +2086,12 @@ void usb_disconnect(struct usb_device **pdev)
 	 */
 	pm_runtime_barrier(&udev->dev);
 
+	if (udev->speed < USB_SPEED_HIGH ) {
+		struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+		if (hcd->driver->change_bus_speed)
+			hcd->driver->change_bus_speed(hcd, 0);
+	}
+
 	usb_lock_device(udev);
 
 	hub_disconnect_children(udev);
@@ -4852,9 +4858,19 @@ loop:
 	if (hub->hdev->parent ||
 			!hcd->driver->port_handed_over ||
 			!(hcd->driver->port_handed_over)(hcd, port1)) {
-		if (status != -ENOTCONN && status != -ENODEV)
+		if (status != -ENOTCONN && status != -ENODEV) {
 			dev_err(&port_dev->dev,
-					"unable to enumerate USB device\n");
+					"unable to enumerate USB device"
+					" at %s while bus at %s \n",
+					usb_speed_string(udev->speed),
+					hdev->descriptor.bDeviceProtocol == USB_HUB_PR_FS ?
+						"FULL_SPEED" : "HIGH_SPEED");
+
+			if (udev->speed < USB_SPEED_HIGH &&
+			    hdev->descriptor.bDeviceProtocol > USB_HUB_PR_FS &&
+			    hcd->driver->change_bus_speed)
+				hcd->driver->change_bus_speed(hcd, 1);
+		}
 	}
 
 done:
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index f8bbd0b6d9fe..a717ddc3eae3 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -66,7 +66,7 @@ int usb_disabled(void)
 EXPORT_SYMBOL_GPL(usb_disabled);
 
 #ifdef	CONFIG_PM
-static int usb_autosuspend_delay = 2;		/* Default delay value,
+static int usb_autosuspend_delay = -1;		/* Default delay value,
 						 * in seconds */
 module_param_named(autosuspend, usb_autosuspend_delay, int, 0644);
 MODULE_PARM_DESC(autosuspend, "default autosuspend delay");
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 571c21727ff9..f4823c4856c5 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -54,6 +54,19 @@
 #include "core.h"
 #include "hcd.h"
 
+struct wrapper_priv_data {
+	struct dwc2_hsotg *hsotg;
+};
+
+/* Gets the dwc2_hsotg from a usb_hcd */
+static struct dwc2_hsotg *dwc2_hcd_to_hsotg(struct usb_hcd *hcd)
+{
+	struct wrapper_priv_data *p;
+
+	p = (struct wrapper_priv_data *) &hcd->hcd_priv;
+	return p->hsotg;
+}
+
 /**
  * dwc2_dump_channel_info() - Prints the state of a host channel
  *
@@ -1350,6 +1363,24 @@ void dwc2_hcd_queue_transactions(struct dwc2_hsotg *hsotg,
 	}
 }
 
+/*
+ * 0: high speed
+ * 1: full speed
+ */
+static void dwc2_change_bus_speed(struct usb_hcd* hcd, int speed)
+{
+	struct dwc2_hsotg *hsotg = dwc2_hcd_to_hsotg(hcd);
+	static int last_speed = 0;
+
+	if (speed == last_speed)
+		return;
+
+	hsotg->core_params->speed = speed;
+	queue_work(hsotg->wq_otg, &hsotg->wf_otg);
+
+	last_speed = speed;
+}
+
 static void dwc2_conn_id_status_change(struct work_struct *work)
 {
 	struct dwc2_hsotg *hsotg = container_of(work, struct dwc2_hsotg,
@@ -2121,19 +2152,6 @@ void dwc2_hcd_dump_frrem(struct dwc2_hsotg *hsotg)
 #endif
 }
 
-struct wrapper_priv_data {
-	struct dwc2_hsotg *hsotg;
-};
-
-/* Gets the dwc2_hsotg from a usb_hcd */
-static struct dwc2_hsotg *dwc2_hcd_to_hsotg(struct usb_hcd *hcd)
-{
-	struct wrapper_priv_data *p;
-
-	p = (struct wrapper_priv_data *) &hcd->hcd_priv;
-	return p->hsotg;
-}
-
 static int _dwc2_hcd_start(struct usb_hcd *hcd);
 
 void dwc2_host_start(struct dwc2_hsotg *hsotg)
@@ -2905,6 +2923,7 @@ static struct hc_driver dwc2_hc_driver = {
 
 	.bus_suspend = _dwc2_hcd_suspend,
 	.bus_resume = _dwc2_hcd_resume,
+	.change_bus_speed = dwc2_change_bus_speed,
 };
 
 /*
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 39c1cbf0e75d..912590c3bd74 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -54,6 +54,37 @@
 
 static const char dwc2_driver_name[] = "dwc2";
 
+static const struct dwc2_core_params params_hi6220 = {
+	.otg_cap			= 2,	/* No HNP/SRP capable */
+	.otg_ver			= 0,	/* 1.3 */
+	.dma_enable			= 1,
+	.dma_desc_enable		= 0,
+	.speed				= 0,	/* High Speed */
+	.enable_dynamic_fifo		= 1,
+	.en_multiple_tx_fifo		= 1,
+	.host_rx_fifo_size		= 512,
+	.host_nperio_tx_fifo_size	= 512,
+	.host_perio_tx_fifo_size	= 512,
+	.max_transfer_size		= 65535,
+	.max_packet_count		= 511,
+	.host_channels			= 16,
+	.phy_type			= 1,	/* UTMI */
+	.phy_utmi_width			= 8,
+	.phy_ulpi_ddr			= 0,	/* Single */
+	.phy_ulpi_ext_vbus		= 0,
+	.i2c_enable			= 0,
+	.ulpi_fs_ls			= 0,
+	.host_support_fs_ls_low_power	= 0,
+	.host_ls_low_power_phy_clk	= 0,	/* 48 MHz */
+	.ts_dline			= 0,
+	.reload_ctl			= 0,
+	.ahbcfg				= GAHBCFG_HBSTLEN_INCR16 <<
+					  GAHBCFG_HBSTLEN_SHIFT,
+	.uframe_sched			= 0,
+	.external_id_pin_ctl		= -1,
+	.hibernation			= -1,
+};
+
 static const struct dwc2_core_params params_bcm2835 = {
 	.otg_cap			= 0,	/* HNP/SRP capable */
 	.otg_ver			= 0,	/* 1.3 */
@@ -308,6 +339,7 @@ static int dwc2_driver_remove(struct platform_device *dev)
 
 static const struct of_device_id dwc2_of_match_table[] = {
 	{ .compatible = "brcm,bcm2835-usb", .data = &params_bcm2835 },
+	{ .compatible = "hisilicon,hi6220-usb", .data = &params_hi6220 },
 	{ .compatible = "rockchip,rk3066-usb", .data = &params_rk3066 },
 	{ .compatible = "snps,dwc2", .data = NULL },
 	{ .compatible = "samsung,s3c6400-hsotg", .data = NULL},
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index e0606c01e8ac..410d368437d1 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -22,6 +22,7 @@ source "drivers/gpu/vga/Kconfig"
 source "drivers/gpu/host1x/Kconfig"
 source "drivers/gpu/ipu-v3/Kconfig"
 
+source "drivers/gpu/arm/Kconfig"
 source "drivers/gpu/drm/Kconfig"
 
 menu "Frame buffer Devices"
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 905caba36529..4c287c57e8fd 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -34,6 +34,8 @@
 #include <linux/slab.h>
 #include <linux/compiler.h>
 #include <linux/pstore_ram.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #define RAMOOPS_KERNMSG_HDR "===="
 #define MIN_MEM_SIZE 4096UL
@@ -466,15 +468,112 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
 	return 0;
 }
 
+static int ramoops_parse_dt_size(struct platform_device *pdev,
+		const char *propname, unsigned long *val)
+{
+	u64 val64;
+	int ret;
+
+	ret = of_property_read_u64(pdev->dev.of_node, propname, &val64);
+	if (ret == -EINVAL) {
+		*val = 0;
+		return 0;
+	} else if (ret != 0) {
+		dev_err(&pdev->dev, "failed to parse property %s: %d\n",
+				propname, ret);
+		return ret;
+	}
+
+	if (val64 > ULONG_MAX) {
+		dev_err(&pdev->dev, "invalid %s %llu\n", propname, val64);
+		return -EOVERFLOW;
+	}
+
+	*val = val64;
+	return 0;
+}
+
+static int ramoops_parse_dt(struct platform_device *pdev,
+		struct ramoops_platform_data *pdata)
+{
+	struct device_node *of_node = pdev->dev.of_node;
+	struct device_node *mem_region;
+	struct resource res;
+	u32 ecc_size;
+	int ret;
+
+	dev_dbg(&pdev->dev, "using Device Tree\n");
+
+	mem_region = of_parse_phandle(of_node, "memory-region", 0);
+	if (!mem_region) {
+		dev_err(&pdev->dev, "no memory-region phandle\n");
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(mem_region, 0, &res);
+	of_node_put(mem_region);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to translate memory-region to resource: %d\n",
+				ret);
+		return ret;
+	}
+
+	pdata->mem_size = resource_size(&res);
+	pdata->mem_address = res.start;
+	pdata->mem_type = of_property_read_bool(of_node, "unbuffered");
+	pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops");
+
+	ret = ramoops_parse_dt_size(pdev, "record-size", &pdata->record_size);
+	if (ret < 0)
+		return ret;
+
+	ret = ramoops_parse_dt_size(pdev, "console-size", &pdata->console_size);
+	if (ret < 0)
+		return ret;
+
+	ret = ramoops_parse_dt_size(pdev, "ftrace-size", &pdata->ftrace_size);
+	if (ret < 0)
+		return ret;
+
+	ret = ramoops_parse_dt_size(pdev, "pmsg-size", &pdata->pmsg_size);
+	if (ret < 0)
+		return ret;
+
+	ret = of_property_read_u32(of_node, "ecc-size", &ecc_size);
+	if (ret == 0) {
+		if (ecc_size > INT_MAX) {
+			dev_err(&pdev->dev, "invalid ecc-size %u\n", ecc_size);
+			return -EOVERFLOW;
+		}
+		pdata->ecc_info.ecc_size = ecc_size;
+	} else if (ret != -EINVAL) {
+		return ret;
+	}
+
+	return 0;
+}
+
 static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
+	struct ramoops_platform_data *pdata = platform_get_drvdata(pdev);
 	struct ramoops_context *cxt = &oops_cxt;
 	size_t dump_mem_sz;
 	phys_addr_t paddr;
 	int err = -EINVAL;
 
+	if (dev->of_node && !pdata) {
+		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+		if (!pdata) {
+			err = -ENOMEM;
+			goto fail_out;
+		}
+
+		err = ramoops_parse_dt(pdev, pdata);
+		if (err < 0)
+			goto fail_out;
+	}
+
 	/* Only a single ramoops area allowed at a time, so fail extra
 	 * probes.
 	 */
@@ -569,6 +668,7 @@ static int ramoops_probe(struct platform_device *pdev)
 		cxt->size, (unsigned long long)cxt->phys_addr,
 		cxt->ecc_info.ecc_size, cxt->ecc_info.block_size);
 
+	platform_set_drvdata(pdev, pdata);
 	return 0;
 
 fail_buf:
@@ -602,12 +702,17 @@ static int ramoops_remove(struct platform_device *pdev)
 
 	return 0;
 }
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "ramoops" },
+	{}
+};
 
 static struct platform_driver ramoops_driver = {
 	.probe		= ramoops_probe,
 	.remove		= ramoops_remove,
 	.driver		= {
-		.name	= "ramoops",
+		.name		= "ramoops",
+		.of_match_table	= dt_match,
 	},
 };
 
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index f1d8d0dbb4f1..7f7b7c134bf4 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -100,10 +100,12 @@ struct mipi_dsi_host_ops {
 struct mipi_dsi_host {
 	struct device *dev;
 	const struct mipi_dsi_host_ops *ops;
+	struct list_head list;
 };
 
 int mipi_dsi_host_register(struct mipi_dsi_host *host);
 void mipi_dsi_host_unregister(struct mipi_dsi_host *host);
+struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node);
 
 /* DSI mode flags */
 
@@ -139,23 +141,55 @@ enum mipi_dsi_pixel_format {
 	MIPI_DSI_FMT_RGB565,
 };
 
+#define DSI_DEV_NAME_SIZE		20
+
+/**
+ * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any
+ *                                given pixel format defined by the MIPI DSI
+ *                                specification
+ * @fmt: MIPI DSI pixel format
+ *
+ * Returns: The number of bits per pixel of the given pixel format.
+ */
+static inline int mipi_dsi_pixel_format_to_bpp(enum mipi_dsi_pixel_format fmt)
+{
+	switch (fmt) {
+	case MIPI_DSI_FMT_RGB888:
+	case MIPI_DSI_FMT_RGB666:
+		return 24;
+
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		return 18;
+
+	case MIPI_DSI_FMT_RGB565:
+		return 16;
+	}
+
+	return -EINVAL;
+}
+
 /**
  * struct mipi_dsi_device - DSI peripheral device
  * @host: DSI host for this peripheral
  * @dev: driver model device node for this peripheral
+ * @name: name of the dsi peripheral
  * @channel: virtual channel assigned to the peripheral
  * @format: pixel format for video mode
  * @lanes: number of active data lanes
  * @mode_flags: DSI operation mode related flags
+ * @phy_clock: phy clock assigned to this peripheral in kHz
  */
 struct mipi_dsi_device {
 	struct mipi_dsi_host *host;
 	struct device dev;
 
+	char name[DSI_DEV_NAME_SIZE];
+
 	unsigned int channel;
 	unsigned int lanes;
 	enum mipi_dsi_pixel_format format;
 	unsigned long mode_flags;
+	u32 phy_clock; /* in kHz */
 };
 
 static inline struct mipi_dsi_device *to_mipi_dsi_device(struct device *dev)
@@ -163,6 +197,8 @@ static inline struct mipi_dsi_device *to_mipi_dsi_device(struct device *dev)
 	return container_of(dev, struct mipi_dsi_device, dev);
 }
 
+struct mipi_dsi_device *mipi_dsi_new_dummy(struct mipi_dsi_host *host, u32 reg);
+void mipi_dsi_unregister_device(struct mipi_dsi_device *dsi);
 struct mipi_dsi_device *of_find_mipi_dsi_device_by_node(struct device_node *np);
 int mipi_dsi_attach(struct mipi_dsi_device *dsi);
 int mipi_dsi_detach(struct mipi_dsi_device *dsi);
diff --git a/include/dt-bindings/clock/hi6220-clock.h b/include/dt-bindings/clock/hi6220-clock.h
index 70ee3833a7a0..8df5a24b6f9a 100644
--- a/include/dt-bindings/clock/hi6220-clock.h
+++ b/include/dt-bindings/clock/hi6220-clock.h
@@ -55,8 +55,8 @@
 #define HI6220_TIMER7_PCLK	34
 #define HI6220_TIMER8_PCLK	35
 #define HI6220_UART0_PCLK	36
-
-#define HI6220_AO_NR_CLKS	37
+#define HI6220_RTC0_PCLK	41
+#define HI6220_AO_NR_CLKS	48
 
 /* clk in Hi6220 systrl */
 /* gate clock */
diff --git a/include/dt-bindings/pinctrl/hisi.h b/include/dt-bindings/pinctrl/hisi.h
new file mode 100755
index 000000000000..38f1ea879ea1
--- /dev/null
+++ b/include/dt-bindings/pinctrl/hisi.h
@@ -0,0 +1,59 @@
+/*
+ * This header provides constants for hisilicon pinctrl bindings.
+ *
+ * Copyright (c) 2015 Hisilicon Limited.
+ * Copyright (c) 2015 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_HISI_H
+#define _DT_BINDINGS_PINCTRL_HISI_H
+
+/* iomg bit definition */
+#define MUX_M0		0
+#define MUX_M1		1
+#define MUX_M2		2
+#define MUX_M3		3
+#define MUX_M4		4
+#define MUX_M5		5
+#define MUX_M6		6
+#define MUX_M7		7
+
+/* iocg bit definition */
+#define PULL_MASK	(3)
+#define PULL_DIS	(0)
+#define PULL_UP		(1 << 0)
+#define PULL_DOWN	(1 << 1)
+
+/* drive strength definition */
+#define DRIVE_MASK	(7 << 4)
+#define DRIVE1_02MA	(0 << 4)
+#define DRIVE1_04MA	(1 << 4)
+#define DRIVE1_08MA	(2 << 4)
+#define DRIVE1_10MA	(3 << 4)
+#define DRIVE2_02MA	(0 << 4)
+#define DRIVE2_04MA	(1 << 4)
+#define DRIVE2_08MA	(2 << 4)
+#define DRIVE2_10MA	(3 << 4)
+#define DRIVE3_04MA	(0 << 4)
+#define DRIVE3_08MA	(1 << 4)
+#define DRIVE3_12MA	(2 << 4)
+#define DRIVE3_16MA	(3 << 4)
+#define DRIVE3_20MA	(4 << 4)
+#define DRIVE3_24MA	(5 << 4)
+#define DRIVE3_32MA	(6 << 4)
+#define DRIVE3_40MA	(7 << 4)
+#define DRIVE4_02MA	(0 << 4)
+#define DRIVE4_04MA	(2 << 4)
+#define DRIVE4_08MA	(4 << 4)
+#define DRIVE4_10MA	(6 << 4)
+
+#endif
diff --git a/include/dt-bindings/reset/hisi,hi6220-resets.h b/include/dt-bindings/reset/hisi,hi6220-resets.h
new file mode 100644
index 000000000000..322ec5335b65
--- /dev/null
+++ b/include/dt-bindings/reset/hisi,hi6220-resets.h
@@ -0,0 +1,75 @@
+/**
+ * This header provides index for the reset controller
+ * based on hi6220 SoC.
+ */
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_HI6220
+#define _DT_BINDINGS_RESET_CONTROLLER_HI6220
+
+#define PERIPH_RSTDIS0_MMC0             0x000
+#define PERIPH_RSTDIS0_MMC1             0x001
+#define PERIPH_RSTDIS0_MMC2             0x002
+#define PERIPH_RSTDIS0_NANDC            0x003
+#define PERIPH_RSTDIS0_USBOTG_BUS       0x004
+#define PERIPH_RSTDIS0_POR_PICOPHY      0x005
+#define PERIPH_RSTDIS0_USBOTG           0x006
+#define PERIPH_RSTDIS0_USBOTG_32K       0x007
+#define PERIPH_RSTDIS1_HIFI             0x100
+#define PERIPH_RSTDIS1_DIGACODEC        0x105
+#define PERIPH_RSTEN2_IPF               0x200
+#define PERIPH_RSTEN2_SOCP              0x201
+#define PERIPH_RSTEN2_DMAC              0x202
+#define PERIPH_RSTEN2_SECENG            0x203
+#define PERIPH_RSTEN2_ABB               0x204
+#define PERIPH_RSTEN2_HPM0              0x205
+#define PERIPH_RSTEN2_HPM1              0x206
+#define PERIPH_RSTEN2_HPM2              0x207
+#define PERIPH_RSTEN2_HPM3              0x208
+#define PERIPH_RSTEN3_CSSYS             0x300
+#define PERIPH_RSTEN3_I2C0              0x301
+#define PERIPH_RSTEN3_I2C1              0x302
+#define PERIPH_RSTEN3_I2C2              0x303
+#define PERIPH_RSTEN3_I2C3              0x304
+#define PERIPH_RSTEN3_UART1             0x305
+#define PERIPH_RSTEN3_UART2             0x306
+#define PERIPH_RSTEN3_UART3             0x307
+#define PERIPH_RSTEN3_UART4             0x308
+#define PERIPH_RSTEN3_SSP               0x309
+#define PERIPH_RSTEN3_PWM               0x30a
+#define PERIPH_RSTEN3_BLPWM             0x30b
+#define PERIPH_RSTEN3_TSENSOR           0x30c
+#define PERIPH_RSTEN3_DAPB              0x312
+#define PERIPH_RSTEN3_HKADC             0x313
+#define PERIPH_RSTEN3_CODEC_SSI         0x314
+#define PERIPH_RSTEN3_PMUSSI1           0x316
+#define PERIPH_RSTEN8_RS0               0x400
+#define PERIPH_RSTEN8_RS2               0x401
+#define PERIPH_RSTEN8_RS3               0x402
+#define PERIPH_RSTEN8_MS0               0x403
+#define PERIPH_RSTEN8_MS2               0x405
+#define PERIPH_RSTEN8_XG2RAM0           0x406
+#define PERIPH_RSTEN8_X2SRAM_TZMA       0x407
+#define PERIPH_RSTEN8_SRAM              0x408
+#define PERIPH_RSTEN8_HARQ              0x40a
+#define PERIPH_RSTEN8_DDRC              0x40c
+#define PERIPH_RSTEN8_DDRC_APB          0x40d
+#define PERIPH_RSTEN8_DDRPACK_APB       0x40e
+#define PERIPH_RSTEN8_DDRT              0x411
+#define PERIPH_RSDIST9_CARM_DAP         0x500
+#define PERIPH_RSDIST9_CARM_ATB         0x501
+#define PERIPH_RSDIST9_CARM_LBUS        0x502
+#define PERIPH_RSDIST9_CARM_POR         0x503
+#define PERIPH_RSDIST9_CARM_CORE        0x504
+#define PERIPH_RSDIST9_CARM_DBG         0x505
+#define PERIPH_RSDIST9_CARM_L2          0x506
+#define PERIPH_RSDIST9_CARM_SOCDBG      0x507
+#define PERIPH_RSDIST9_CARM_ETM         0x508
+
+#define MEDIA_G3D                       0
+#define MEDIA_CODEC_VPU                 2
+#define MEDIA_CODEC_JPEG                3
+#define MEDIA_ISP                       4
+#define MEDIA_ADE                       5
+#define MEDIA_MMU                       6
+#define MEDIA_XG2RAM1                   7
+
+#endif /*_DT_BINDINGS_RESET_CONTROLLER_HI6220*/
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
new file mode 100644
index 000000000000..b5abfda80465
--- /dev/null
+++ b/include/linux/arm-smccc.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __LINUX_ARM_SMCCC_H
+#define __LINUX_ARM_SMCCC_H
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+/*
+ * This file provides common defines for ARM SMC Calling Convention as
+ * specified in
+ * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+ */
+
+#define ARM_SMCCC_STD_CALL		0
+#define ARM_SMCCC_FAST_CALL		1
+#define ARM_SMCCC_TYPE_SHIFT		31
+
+#define ARM_SMCCC_SMC_32		0
+#define ARM_SMCCC_SMC_64		1
+#define ARM_SMCCC_CALL_CONV_SHIFT	30
+
+#define ARM_SMCCC_OWNER_MASK		0x3F
+#define ARM_SMCCC_OWNER_SHIFT		24
+
+#define ARM_SMCCC_FUNC_MASK		0xFFFF
+
+#define ARM_SMCCC_IS_FAST_CALL(smc_val)	\
+	((smc_val) & (ARM_SMCCC_FAST_CALL << ARM_SMCCC_TYPE_SHIFT))
+#define ARM_SMCCC_IS_64(smc_val) \
+	((smc_val) & (ARM_SMCCC_SMC_64 << ARM_SMCCC_CALL_CONV_SHIFT))
+#define ARM_SMCCC_FUNC_NUM(smc_val)	((smc_val) & ARM_SMCCC_FUNC_MASK)
+#define ARM_SMCCC_OWNER_NUM(smc_val) \
+	(((smc_val) >> ARM_SMCCC_OWNER_SHIFT) & ARM_SMCCC_OWNER_MASK)
+
+#define ARM_SMCCC_CALL_VAL(type, calling_convention, owner, func_num) \
+	(((type) << ARM_SMCCC_TYPE_SHIFT) | \
+	((calling_convention) << ARM_SMCCC_CALL_CONV_SHIFT) | \
+	(((owner) & ARM_SMCCC_OWNER_MASK) << ARM_SMCCC_OWNER_SHIFT) | \
+	((func_num) & ARM_SMCCC_FUNC_MASK))
+
+#define ARM_SMCCC_OWNER_ARCH		0
+#define ARM_SMCCC_OWNER_CPU		1
+#define ARM_SMCCC_OWNER_SIP		2
+#define ARM_SMCCC_OWNER_OEM		3
+#define ARM_SMCCC_OWNER_STANDARD	4
+#define ARM_SMCCC_OWNER_TRUSTED_APP	48
+#define ARM_SMCCC_OWNER_TRUSTED_APP_END	49
+#define ARM_SMCCC_OWNER_TRUSTED_OS	50
+#define ARM_SMCCC_OWNER_TRUSTED_OS_END	63
+
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+	unsigned long a0;
+	unsigned long a1;
+	unsigned long a2;
+	unsigned long a3;
+};
+
+/**
+ * arm_smccc_smc() - make SMC calls
+ * @a0-a7: arguments passed in registers 0 to 7
+ * @res: result values from registers 0 to 3
+ *
+ * This function is used to make SMC calls following SMC Calling Convention.
+ * The content of the supplied param are copied to registers 0 to 7 prior
+ * to the SMC instruction. The return values are updated with the content
+ * from register 0 to 3 on return from the SMC instruction.
+ */
+asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1,
+			unsigned long a2, unsigned long a3, unsigned long a4,
+			unsigned long a5, unsigned long a6, unsigned long a7,
+			struct arm_smccc_res *res);
+
+/**
+ * arm_smccc_hvc() - make HVC calls
+ * @a0-a7: arguments passed in registers 0 to 7
+ * @res: result values from registers 0 to 3
+ *
+ * This function is used to make HVC calls following SMC Calling
+ * Convention.  The content of the supplied param are copied to registers 0
+ * to 7 prior to the HVC instruction. The return values are updated with
+ * the content from register 0 to 3 on return from the HVC instruction.
+ */
+asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1,
+			unsigned long a2, unsigned long a3, unsigned long a4,
+			unsigned long a5, unsigned long a6, unsigned long a7,
+			struct arm_smccc_res *res);
+
+#endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1a96fdaa33d5..e133705d794a 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -26,6 +26,10 @@ SUBSYS(cpu)
 SUBSYS(cpuacct)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_SCHEDTUNE)
+SUBSYS(schedtune)
+#endif
+
 #if IS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(io)
 #endif
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 177c7680c1a8..b5433349938a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -160,6 +160,7 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
+bool cpufreq_driver_is_slow(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 #else
 static inline unsigned int cpufreq_get(unsigned int cpu)
@@ -317,6 +318,14 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_NEED_INITIAL_FREQ_CHECK	(1 << 5)
 
+/*
+ * Indicates that it is safe to call cpufreq_driver_target from
+ * non-interruptable context in scheduler hot paths.  Drivers must
+ * opt-in to this flag, as the safe default is that they might sleep
+ * or be too slow for hot path use.
+ */
+#define CPUFREQ_DRIVER_FAST		(1 << 6)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
@@ -487,6 +496,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand;
 #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
 extern struct cpufreq_governor cpufreq_gov_conservative;
 #define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_conservative)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED)
+extern struct cpufreq_governor cpufreq_gov_sched;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_sched)
 #endif
 
 /*********************************************************************
@@ -616,4 +628,8 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table,
 		unsigned int transition_latency);
+
+struct sched_domain;
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
+unsigned long cpufreq_scale_max_freq_capacity(int cpu);
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 786ad32631a6..6eae1576499e 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -204,7 +204,7 @@ static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv,
 #endif
 
 /* kernel/sched/idle.c */
-extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index);
 extern void default_idle_call(void);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/include/linux/ft5x06_ts.h b/include/linux/ft5x06_ts.h
new file mode 100644
index 000000000000..8ce1a42866ef
--- /dev/null
+++ b/include/linux/ft5x06_ts.h
@@ -0,0 +1,42 @@
+#ifndef __LINUX_FT5X0X_TS_H__
+#define __LINUX_FT5X0X_TS_H__
+
+/* -- dirver configure -- */
+#define CFG_MAX_TOUCH_POINTS	2
+
+#define PRESS_MAX	0xFF
+#define FT_PRESS		0x7F
+
+#define FT5X0X_NAME	"ft5x0x_ts"
+
+#define FT_MAX_ID	0x0F
+#define FT_TOUCH_STEP	6
+#define FT_TOUCH_X_H_POS		3
+#define FT_TOUCH_X_L_POS		4
+#define FT_TOUCH_Y_H_POS		5
+#define FT_TOUCH_Y_L_POS		6
+#define FT_TOUCH_EVENT_POS		3
+#define FT_TOUCH_ID_POS			5
+
+#define POINT_READ_BUF	(3 + FT_TOUCH_STEP * CFG_MAX_TOUCH_POINTS)
+
+/*register address*/
+#define FT5x0x_REG_FW_VER		0xA6
+#define FT5x0x_REG_POINT_RATE	0x88
+#define FT5X0X_REG_THGROUP	0x80
+
+int ft5x0x_i2c_Read(struct i2c_client *client, char *writebuf, int writelen,
+		    char *readbuf, int readlen);
+int ft5x0x_i2c_Write(struct i2c_client *client, char *writebuf, int writelen);
+
+/* The platform data for the Focaltech ft5x0x touchscreen driver */
+struct ft5x0x_platform_data {
+	unsigned int x_max;
+	unsigned int y_max;
+	unsigned long irqflags;
+	unsigned int irq;
+	unsigned int irq_cfg;
+	unsigned int reset;
+};
+
+#endif
diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h
new file mode 100644
index 000000000000..dbbe9a644622
--- /dev/null
+++ b/include/linux/mfd/hi655x-pmic.h
@@ -0,0 +1,55 @@
+/*
+ * Device driver for regulators in hi655x IC
+ *
+ * Copyright (c) 2016 Hisilicon.
+ *
+ * Authors:
+ * Chen Feng <puck.chen@hisilicon.com>
+ * Fei  Wang <w.f@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __HI655X_PMIC_H
+#define __HI655X_PMIC_H
+
+/* Hi655x registers are mapped to memory bus in 4 bytes stride */
+#define HI655X_STRIDE                   4
+#define HI655X_BUS_ADDR(x)              ((x) << 2)
+
+#define HI655X_BITS                     8
+
+#define HI655X_NR_IRQ                   32
+
+#define HI655X_IRQ_STAT_BASE            (0x003 << 2)
+#define HI655X_IRQ_MASK_BASE            (0x007 << 2)
+#define HI655X_ANA_IRQM_BASE            (0x1b5 << 2)
+#define HI655X_IRQ_ARRAY                4
+#define HI655X_IRQ_MASK                 0xFF
+#define HI655X_IRQ_CLR                  0xFF
+#define HI655X_VER_REG                  0x00
+
+#define PMU_VER_START                   0x10
+#define PMU_VER_END                     0x38
+
+#define RESERVE_INT                     BIT(7)
+#define PWRON_D20R_INT                  BIT(6)
+#define PWRON_D20F_INT                  BIT(5)
+#define PWRON_D4SR_INT                  BIT(4)
+#define VSYS_6P0_D200UR_INT             BIT(3)
+#define VSYS_UV_D3R_INT                 BIT(2)
+#define VSYS_2P5_R_INT                  BIT(1)
+#define OTMP_D1R_INT                    BIT(0)
+
+struct hi655x_pmic {
+	struct resource *res;
+	struct device *dev;
+	struct regmap *regmap;
+	int gpio;
+	unsigned int ver;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+#endif
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 7776afb0ffa5..b4295be358ec 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -14,9 +14,10 @@
 #ifndef LINUX_MMC_DW_MMC_H
 #define LINUX_MMC_DW_MMC_H
 
-#include <linux/scatterlist.h>
-#include <linux/mmc/core.h>
 #include <linux/dmaengine.h>
+#include <linux/mmc/core.h>
+#include <linux/reset.h>
+#include <linux/scatterlist.h>
 
 #define MAX_MCI_SLOTS	2
 
@@ -276,6 +277,7 @@ struct dw_mci_board {
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
 
+	struct reset_control *rstc;
 	struct dw_mci_dma_ops *dma_ops;
 	struct dma_pdata *data;
 };
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5f37614f2451..7860329d7ce8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -182,6 +182,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
 	/* Get VPD from function 0 VPD */
 	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 352213b360d7..01c9413dbd87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -929,6 +929,14 @@ enum cpu_idle_type {
 #define SCHED_CAPACITY_SHIFT	10
 #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
+struct sched_capacity_reqs {
+	unsigned long cfs;
+	unsigned long rt;
+	unsigned long dl;
+
+	unsigned long total;
+};
+
 /*
  * Wake-queues are lists of tasks with a pending wakeup, whose
  * callers have already marked the task as woken internally,
@@ -991,6 +999,7 @@ extern void wake_up_q(struct wake_q_head *head);
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
 #define SD_NUMA			0x4000	/* cross-node balancing */
+#define SD_SHARE_CAP_STATES	0x8000  /* Domain members share capacity state */
 
 #ifdef CONFIG_SCHED_SMT
 static inline int cpu_smt_flags(void)
@@ -1023,6 +1032,24 @@ struct sched_domain_attr {
 
 extern int sched_domain_level_max;
 
+struct capacity_state {
+	unsigned long cap;	/* compute capacity */
+	unsigned long power;	/* power consumption at this compute capacity */
+};
+
+struct idle_state {
+	unsigned long power;	 /* power consumption in this idle state */
+};
+
+struct sched_group_energy {
+	unsigned int nr_idle_states;	/* number of idle states */
+	struct idle_state *idle_states;	/* ptr to idle state array */
+	unsigned int nr_cap_states;	/* number of capacity states */
+	struct capacity_state *cap_states; /* ptr to capacity state array */
+};
+
+unsigned long capacity_curr_of(int cpu);
+
 struct sched_group;
 
 struct sched_domain {
@@ -1121,6 +1148,8 @@ bool cpus_share_cache(int this_cpu, int that_cpu);
 
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 typedef int (*sched_domain_flags_f)(void);
+typedef
+const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu);
 
 #define SDTL_OVERLAP	0x01
 
@@ -1133,6 +1162,7 @@ struct sd_data {
 struct sched_domain_topology_level {
 	sched_domain_mask_f mask;
 	sched_domain_flags_f sd_flags;
+	sched_domain_energy_f energy;
 	int		    flags;
 	int		    numa_level;
 	struct sd_data      data;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index c9e4731cf10b..4479e48c7712 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -77,6 +77,22 @@ extern int sysctl_sched_rt_runtime;
 extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 #endif
 
+#ifdef CONFIG_SCHED_TUNE
+extern unsigned int sysctl_sched_cfs_boost;
+int sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
+				   void __user *buffer, size_t *length,
+				   loff_t *ppos);
+static inline unsigned int get_sysctl_sched_cfs_boost(void)
+{
+	return sysctl_sched_cfs_boost;
+}
+#else
+static inline unsigned int get_sysctl_sched_cfs_boost(void)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_AUTOGROUP
 extern unsigned int sysctl_sched_autogroup_enabled;
 #endif
diff --git a/include/linux/sched_energy.h b/include/linux/sched_energy.h
new file mode 100644
index 000000000000..a3f1627ac609
--- /dev/null
+++ b/include/linux/sched_energy.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_SCHED_ENERGY_H
+#define _LINUX_SCHED_ENERGY_H
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+/*
+ * There doesn't seem to be an NR_CPUS style max number of sched domain
+ * levels so here's an arbitrary constant one for the moment.
+ *
+ * The levels alluded to here correspond to entries in struct
+ * sched_domain_topology_level that are meant to be populated by arch
+ * specific code (topology.c).
+ */
+#define NR_SD_LEVELS 8
+
+#define SD_LEVEL0   0
+#define SD_LEVEL1   1
+#define SD_LEVEL2   2
+#define SD_LEVEL3   3
+#define SD_LEVEL4   4
+#define SD_LEVEL5   5
+#define SD_LEVEL6   6
+#define SD_LEVEL7   7
+
+/*
+ * Convenience macro for iterating through said sd levels.
+ */
+#define for_each_possible_sd_level(level)		    \
+	for (level = 0; level < NR_SD_LEVELS; level++)
+
+extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS];
+
+void init_sched_energy_costs(void);
+
+#endif
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
new file mode 100644
index 000000000000..3f4e7683361c
--- /dev/null
+++ b/include/linux/tee_drv.h
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __TEE_DRV_H
+#define __TEE_DRV_H
+
+#include <linux/types.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/tee.h>
+
+/*
+ * The file describes the API provided by the generic TEE driver to the
+ * specific TEE driver.
+ */
+
+#define TEE_SHM_MAPPED		0x1	/* Memory mapped by the kernel */
+#define TEE_SHM_DMA_BUF		0x2	/* Memory with dma-buf handle */
+
+struct tee_device;
+struct tee_shm;
+struct tee_shm_pool;
+
+/**
+ * struct tee_context - driver specific context on file pointer data
+ * @teedev:	pointer to this drivers struct tee_device
+ * @data:	driver specific context data, managed by the driver
+ */
+struct tee_context {
+	struct tee_device *teedev;
+	void *data;
+};
+
+struct tee_param_memref {
+	size_t shm_offs;
+	size_t size;
+	struct tee_shm *shm;
+};
+
+struct tee_param_value {
+	u64 a;
+	u64 b;
+	u64 c;
+};
+
+struct tee_param {
+	u64 attr;
+	union {
+		struct tee_param_memref memref;
+		struct tee_param_value value;
+	} u;
+};
+
+/**
+ * struct tee_driver_ops - driver operations vtable
+ * @get_version:	returns version of driver
+ * @open:		called when the device file is opened
+ * @release:		release this open file
+ */
+struct tee_driver_ops {
+	void (*get_version)(struct tee_device *teedev,
+			struct tee_ioctl_version_data *vers);
+	int (*open)(struct tee_context *ctx);
+	void (*release)(struct tee_context *ctx);
+	int (*open_session)(struct tee_context *ctx,
+			struct tee_ioctl_open_session_arg *arg,
+			struct tee_param *param);
+	int (*close_session)(struct tee_context *ctx, u32 session);
+	int (*invoke_func)(struct tee_context *ctx,
+			struct tee_ioctl_invoke_arg *arg,
+			struct tee_param *param);
+	int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session);
+	int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params,
+			struct tee_param *param);
+	int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params,
+			struct tee_param *param);
+};
+
+/**
+ * struct tee_desc - Describes the TEE driver to the subsystem
+ * @name:	name of driver
+ * @ops:	driver operations vtable
+ * @owner:	module providing the driver
+ * @flags:	Extra properties of driver, defined by TEE_DESC_* below
+ */
+#define TEE_DESC_PRIVILEGED	0x1
+struct tee_desc {
+	const char *name;
+	const struct tee_driver_ops *ops;
+	struct module *owner;
+	u32 flags;
+};
+
+
+/**
+ * tee_device_alloc() - Allocate a new struct tee_device instance
+ * @teedesc:	Descriptor for this driver
+ * @dev:	Parent device for this device
+ * @pool:	Shared memory pool, NULL if not used
+ * @driver_data: Private driver data for this device
+ *
+ * Allocates a new struct tee_device instance. The device is
+ * removed by tee_device_unregister().
+ *
+ * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ */
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+			struct device *dev, struct tee_shm_pool *pool,
+			void *driver_data);
+
+/**
+ * tee_device_register() - Registers a TEE device
+ * @teedev:	Device to register
+ *
+ * tee_device_unregister() need to be called to remove the @teedev if
+ * this function fails.
+ *
+ * @returns < 0 on failure
+ */
+int tee_device_register(struct tee_device *teedev);
+
+/**
+ * tee_device_unregister() - Removes a TEE device
+ * @teedev:	Device to unregister
+ *
+ * This function should be called to remove the @teedev even if
+ * tee_device_register() hasn't been called yet. Does nothing if
+ * @teedev is NULL.
+ */
+void tee_device_unregister(struct tee_device *teedev);
+
+/**
+ * struct tee_shm_pool_mem_info - holds information needed to create a shared memory pool
+ * @vaddr:	Virtual address of start of pool
+ * @paddr:	Physical address of start of pool
+ * @size:	Size in bytes of the pool
+ */
+struct tee_shm_pool_mem_info {
+	unsigned long vaddr;
+	unsigned long paddr;
+	size_t size;
+};
+
+/**
+ * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved memory range
+ * @dev:	Device allocating the pool
+ * @priv_info:	Information for driver private shared memory pool
+ * @dmabuf_info: Information for dma-buf shared memory pool
+ *
+ * Start and end of pools will must be page aligned.
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *tee_shm_pool_alloc_res_mem(struct device *dev,
+			struct tee_shm_pool_mem_info *priv_info,
+			struct tee_shm_pool_mem_info *dmabuf_info);
+
+/**
+ * tee_shm_pool_free() - Free a shared memory pool
+ * @pool:	The shared memory pool to free
+ *
+ * The must be no remaining shared memory allocated from this pool when
+ * this function is called.
+ */
+void tee_shm_pool_free(struct tee_shm_pool *pool);
+
+/**
+ * tee_get_drvdata() - Return driver_data pointer
+ * @returns the driver_data pointer supplied to tee_register().
+ */
+void *tee_get_drvdata(struct tee_device *teedev);
+
+/**
+ * tee_shm_alloc() - Allocate shared memory
+ * @teedev:	Driver that allocates the shared memory
+ * @size:	Requested size of shared memory
+ * @flags:	Flags setting properties for the requested shared memory.
+ *
+ * Memory allocated as global shared memory is automatically freed when the
+ * TEE file pointer is closed. The @flags field uses the bits defined by
+ * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If
+ * TEE_SHM_DMA_BUF global shared memory will be allocated and associated
+ * with a dma-buf handle, else driver private memory.
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_alloc(struct tee_device *teedev, size_t size,
+			u32 flags);
+
+/**
+ * tee_shm_free() - Free shared memory
+ * @shm:	Handle to shared memory to free
+ */
+void tee_shm_free(struct tee_shm *shm);
+
+/**
+ * tee_shm_put() - Decrease reference count on a shared memory handle
+ * @shm:	Shared memory handle
+ */
+void tee_shm_put(struct tee_shm *shm);
+
+/**
+ * tee_shm_va2pa() - Get physical address of a virtual address
+ * @shm:	Shared memory handle
+ * @va:		Virtual address to tranlsate
+ * @pa:		Returned physical address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa);
+
+/**
+ * tee_shm_pa2va() - Get virtual address of a physical address
+ * @shm:	Shared memory handle
+ * @pa:		Physical address to tranlsate
+ * @va:		Returned virtual address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va);
+
+/**
+ * tee_shm_get_va() - Get virtual address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @returns virtual address of the shared memory + offs if offs is within
+ *	the bounds of this shared memory, else an ERR_PTR
+ */
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs);
+
+/**
+ * tee_shm_get_pa() - Get physical address of a shared memory plus an offset
+ * @shm:	Shared memory handle
+ * @offs:	Offset from start of this shared memory
+ * @pa:		Physical address to return
+ * @returns 0 if offs is within the bounds of this shared memory, else an
+ *	error code.
+ */
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa);
+
+/**
+ * tee_shm_get_id() - Get id of a shared memory object
+ * @shm:	Shared memory handle
+ * @returns id
+ */
+int tee_shm_get_id(struct tee_shm *shm);
+
+/**
+ * tee_shm_get_from_id() - Find shared memory object and increase referece count
+ * @teedev:	Driver owning the shared mmemory
+ * @id:		Id of shared memory object
+ * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_get_from_id(struct tee_device *teedev, int id);
+
+struct tee_context *tee_client_open_context(struct tee_context *start,
+			int (*match)(struct tee_ioctl_version_data *,
+				const void *),
+			const void *data, struct tee_ioctl_version_data *vers);
+
+void tee_client_close_context(struct tee_context *ctx);
+
+void tee_client_get_version(struct tee_context *ctx,
+			struct tee_ioctl_version_data *vers);
+
+int tee_client_open_session(struct tee_context *ctx,
+			struct tee_ioctl_open_session_arg *arg,
+			struct tee_param *param);
+
+int tee_client_close_session(struct tee_context *ctx, u32 session);
+
+int tee_client_invoke_func(struct tee_context *ctx,
+			struct tee_ioctl_invoke_arg *arg,
+			struct tee_param *param);
+
+#endif /*__TEE_DRV_H*/
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 0a0d56834c8e..9b366ee2f8f4 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -86,6 +86,7 @@ struct st_proto_s {
 extern long st_register(struct st_proto_s *);
 extern long st_unregister(struct st_proto_s *);
 
+extern struct ti_st_plat_data   *dt_pdata;
 
 /*
  * header information used by st_core.c
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 3993b21f3d11..3ada32f24310 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -396,6 +396,7 @@ struct hc_driver {
 	int	(*find_raw_port_number)(struct usb_hcd *, int);
 	/* Call for power on/off the port if necessary */
 	int	(*port_power)(struct usb_hcd *hcd, int portnum, bool enable);
+	void	(*change_bus_speed)(struct usb_hcd*, int);
 
 };
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1878d0a96333..2d9d19c3e051 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -370,6 +370,11 @@ struct hci_dev {
 
 	DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
 
+#ifdef CONFIG_BT_LEDS
+	struct led_trigger	*tx_led, *rx_led;
+	char			tx_led_name[32], rx_led_name[32];
+#endif
+
 	struct delayed_work	le_scan_disable;
 	struct delayed_work	le_scan_restart;
 
diff --git a/include/trace/events/cpufreq_sched.h b/include/trace/events/cpufreq_sched.h
new file mode 100644
index 000000000000..a46cd088e969
--- /dev/null
+++ b/include/trace/events/cpufreq_sched.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (C)  2015 Steve Muckle <smuckle@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpufreq_sched
+
+#if !defined(_TRACE_CPUFREQ_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUFREQ_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpufreq_sched_throttled,
+	    TP_PROTO(unsigned int rem),
+	    TP_ARGS(rem),
+	    TP_STRUCT__entry(
+		    __field(	unsigned int,	rem)
+	    ),
+	    TP_fast_assign(
+		    __entry->rem = rem;
+	    ),
+	    TP_printk("throttled - %d usec remaining", __entry->rem)
+);
+
+TRACE_EVENT(cpufreq_sched_request_opp,
+	    TP_PROTO(int cpu,
+		     unsigned long capacity,
+		     unsigned int freq_new,
+		     unsigned int requested_freq),
+	    TP_ARGS(cpu, capacity, freq_new, requested_freq),
+	    TP_STRUCT__entry(
+		    __field(	int,		cpu)
+		    __field(	unsigned long,	capacity)
+		    __field(	unsigned int,	freq_new)
+		    __field(	unsigned int,	requested_freq)
+		    ),
+	    TP_fast_assign(
+		    __entry->cpu = cpu;
+		    __entry->capacity = capacity;
+		    __entry->freq_new = freq_new;
+		    __entry->requested_freq = requested_freq;
+		    ),
+	    TP_printk("cpu %d cap change, cluster cap request %ld => OPP %d "
+		      "(cur %d)",
+		      __entry->cpu, __entry->capacity, __entry->freq_new,
+		      __entry->requested_freq)
+);
+
+TRACE_EVENT(cpufreq_sched_update_capacity,
+	    TP_PROTO(int cpu,
+		     bool request,
+		     struct sched_capacity_reqs *scr,
+		     unsigned long new_capacity),
+	    TP_ARGS(cpu, request, scr, new_capacity),
+	    TP_STRUCT__entry(
+		    __field(	int,		cpu)
+		    __field(	bool,		request)
+		    __field(	unsigned long,	cfs)
+		    __field(	unsigned long,	rt)
+		    __field(	unsigned long,	dl)
+		    __field(	unsigned long,	total)
+		    __field(	unsigned long,	new_total)
+	    ),
+	    TP_fast_assign(
+		    __entry->cpu = cpu;
+		    __entry->request = request;
+		    __entry->cfs = scr->cfs;
+		    __entry->rt = scr->rt;
+		    __entry->dl = scr->dl;
+		    __entry->total = scr->total;
+		    __entry->new_total = new_capacity;
+	    ),
+	    TP_printk("cpu=%d set_cap=%d cfs=%ld rt=%ld dl=%ld old_tot=%ld "
+		      "new_tot=%ld",
+		      __entry->cpu, __entry->request, __entry->cfs, __entry->rt,
+		      __entry->dl, __entry->total, __entry->new_total)
+);
+
+#endif /* _TRACE_CPUFREQ_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 284244ebfe8d..f4be04e44252 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -120,6 +120,13 @@ DEFINE_EVENT(cpu, cpu_frequency,
 	TP_ARGS(frequency, cpu_id)
 );
 
+DEFINE_EVENT(cpu, cpu_capacity,
+
+	TP_PROTO(unsigned int capacity, unsigned int cpu_id),
+
+	TP_ARGS(capacity, cpu_id)
+);
+
 TRACE_EVENT(device_pm_callback_start,
 
 	TP_PROTO(struct device *dev, const char *pm_ops, int event),
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57517a9..7ec9dcfc701a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -562,6 +562,208 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
 
 	TP_printk("cpu=%d", __entry->cpu)
 );
+
+TRACE_EVENT(sched_contrib_scale_f,
+
+	TP_PROTO(int cpu, unsigned long freq_scale_factor,
+		 unsigned long cpu_scale_factor),
+
+	TP_ARGS(cpu, freq_scale_factor, cpu_scale_factor),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(unsigned long, freq_scale_factor)
+		__field(unsigned long, cpu_scale_factor)
+	),
+
+	TP_fast_assign(
+		__entry->cpu = cpu;
+		__entry->freq_scale_factor = freq_scale_factor;
+		__entry->cpu_scale_factor = cpu_scale_factor;
+	),
+
+	TP_printk("cpu=%d freq_scale_factor=%lu cpu_scale_factor=%lu",
+		  __entry->cpu, __entry->freq_scale_factor,
+		  __entry->cpu_scale_factor)
+);
+
+/*
+ * Tracepoint for accounting sched averages for tasks.
+ */
+TRACE_EVENT(sched_load_avg_task,
+
+	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg),
+
+	TP_ARGS(tsk, avg),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN		)
+		__field( pid_t,	pid				)
+		__field( int,	cpu				)
+		__field( unsigned long,	load_avg		)
+		__field( unsigned long,	util_avg		)
+		__field( u64,		load_sum		)
+		__field( u32,		util_sum		)
+		__field( u32,		period_contrib		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= task_cpu(tsk);
+		__entry->load_avg		= avg->load_avg;
+		__entry->util_avg		= avg->util_avg;
+		__entry->load_sum		= avg->load_sum;
+		__entry->util_sum		= avg->util_sum;
+		__entry->period_contrib		= avg->period_contrib;
+	),
+
+	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu load_sum=%llu"
+		  " util_sum=%u period_contrib=%u",
+		  __entry->comm,
+		  __entry->pid,
+		  __entry->cpu,
+		  __entry->load_avg,
+		  __entry->util_avg,
+		  (u64)__entry->load_sum,
+		  (u32)__entry->util_sum,
+		  (u32)__entry->period_contrib)
+);
+
+/*
+ * Tracepoint for accounting sched averages for cpus.
+ */
+TRACE_EVENT(sched_load_avg_cpu,
+
+	TP_PROTO(int cpu, struct cfs_rq *cfs_rq),
+
+	TP_ARGS(cpu, cfs_rq),
+
+	TP_STRUCT__entry(
+		__field( int,	cpu				)
+		__field( unsigned long,	load_avg		)
+		__field( unsigned long,	util_avg		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu			= cpu;
+		__entry->load_avg		= cfs_rq->avg.load_avg;
+		__entry->util_avg		= cfs_rq->avg.util_avg;
+	),
+
+	TP_printk("cpu=%d load_avg=%lu util_avg=%lu",
+		  __entry->cpu, __entry->load_avg, __entry->util_avg)
+);
+
+/*
+ * Tracepoint for sched_tune_config settings
+ */
+TRACE_EVENT(sched_tune_config,
+
+	TP_PROTO(int boost),
+
+	TP_ARGS(boost),
+
+	TP_STRUCT__entry(
+		__field( int,	boost		)
+	),
+
+	TP_fast_assign(
+		__entry->boost 	= boost;
+	),
+
+	TP_printk("boost=%d ", __entry->boost)
+);
+
+/*
+ * Tracepoint for accounting CPU  boosted utilization
+ */
+TRACE_EVENT(sched_boost_cpu,
+
+	TP_PROTO(int cpu, unsigned long util, unsigned long margin),
+
+	TP_ARGS(cpu, util, margin),
+
+	TP_STRUCT__entry(
+		__field( int,		cpu			)
+		__field( unsigned long,	util			)
+		__field( unsigned long,	margin			)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->util	= util;
+		__entry->margin	= margin;
+	),
+
+	TP_printk("cpu=%d util=%lu margin=%lu",
+		  __entry->cpu,
+		  __entry->util,
+		  __entry->margin)
+);
+
+/*
+ * Tracepoint for schedtune_tasks_update
+ */
+TRACE_EVENT(sched_tune_tasks_update,
+
+	TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx,
+		unsigned int boost, unsigned int max_boost),
+
+	TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,		pid		)
+		__field( int,		cpu		)
+		__field( int,		tasks		)
+		__field( int,		idx		)
+		__field( unsigned int,	boost		)
+		__field( unsigned int,	max_boost	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->cpu 		= cpu;
+		__entry->tasks		= tasks;
+		__entry->idx 		= idx;
+		__entry->boost		= boost;
+		__entry->max_boost	= max_boost;
+	),
+
+	TP_printk("pid=%d comm=%s "
+			"cpu=%d tasks=%d idx=%d boost=%u max_boost=%u",
+		__entry->pid, __entry->comm,
+		__entry->cpu, __entry->tasks, __entry->idx,
+		__entry->boost, __entry->max_boost)
+);
+
+/*
+ * Tracepoint for schedtune_boostgroup_update
+ */
+TRACE_EVENT(sched_tune_boostgroup_update,
+
+	TP_PROTO(int cpu, int variation, int max_boost),
+
+	TP_ARGS(cpu, variation, max_boost),
+
+	TP_STRUCT__entry(
+		__field( int,	cpu		)
+		__field( int,	variation	)
+		__field( int,	max_boost	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu		= cpu;
+		__entry->variation	= variation;
+		__entry->max_boost	= max_boost;
+	),
+
+	TP_printk("cpu=%d variation=%d max_boost=%d",
+		__entry->cpu, __entry->variation, __entry->max_boost)
+);
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
new file mode 100644
index 000000000000..ae07ed7f705f
--- /dev/null
+++ b/include/uapi/linux/tee.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __TEE_H
+#define __TEE_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * This file describes the API provided by a TEE driver to user space.
+ *
+ * Each TEE driver defines a TEE specific protocol which is used for the
+ * data passed back and forth using TEE_IOC_CMD.
+ */
+
+
+/* Helpers to make the ioctl defines */
+#define TEE_IOC_MAGIC	0xa4
+#define TEE_IOC_BASE	0
+
+/* Flags relating to shared memory */
+#define TEE_IOCTL_SHM_MAPPED	0x1	/* memory mapped in normal world */
+#define TEE_IOCTL_SHM_DMA_BUF	0x2	/* dma-buf handle on shared memory */
+
+#define TEE_MAX_ARG_SIZE	1024
+
+#define TEE_GEN_CAP_GP		(1 << 0)/* Global Platform compliant TEE */
+
+/*
+ * TEE Implementation ID
+ */
+#define TEE_IMPL_ID_OPTEE	1
+
+/*
+ * OP-TEE specific capabilities
+ */
+#define TEE_OPTEE_CAP_TZ	(1 << 0)
+
+/**
+ * struct tee_ioctl_version_data - TEE version
+ * @impl_id:	[out] TEE implementation id
+ * @impl_caps:	[out] Implementation specific capabilities
+ * @gen_caps:	[out] Generic capabilities, defined by TEE_GEN_CAPS_* above
+ *
+ * Identifies the TEE implementaion, @impl_id is one of TEE_IMPL_ID_* above.
+ * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_*
+ * is valid when @impl_id == TEE_IMPL_ID_OPTEE.
+ */
+struct tee_ioctl_version_data {
+	__u32 impl_id;
+	__u32 impl_caps;
+	__u32 gen_caps;
+};
+/**
+ * TEE_IOC_VERSION - query version of TEE
+ *
+ * Takes a tee_version struct and returns with the TEE version data filled
+ * in.
+ */
+#define TEE_IOC_VERSION		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \
+				     struct tee_ioctl_version_data)
+
+/**
+ * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument
+ * @size:	[in/out] Size of shared memory to allocate
+ * @flags:	[in/out] Flags to/from allocation.
+ * @id:		[out] Identifier of the shared memory
+ *
+ * The flags field should currently be zero as input. Updated by the call
+ * with actual flags as defined by TEE_IOCTL_SHM_* above.
+ * This structure is used as argument for TEE_IOC_SHM_ALLOC below.
+ */
+struct tee_ioctl_shm_alloc_data {
+	__u64 size;
+	__u32 flags;
+	__s32 id;
+};
+/**
+ * TEE_IOC_SHM_ALLOC - allocate shared memory
+ *
+ * Allocates shared memory between the user space process and secure OS.
+ *
+ * Returns a file descriptor on success or < 0 on failure
+ *
+ * The returned file descriptor is used to map the shared memory into user
+ * space. The shared memory is freed when the descriptor is closed and the
+ * memory is unmapped.
+ */
+#define TEE_IOC_SHM_ALLOC	_IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \
+				     struct tee_ioctl_shm_alloc_data)
+
+/**
+ * struct tee_ioctl_buf_data - Variable sized buffer
+ * @buf_ptr:	[in] A __user pointer to a buffer
+ * @buf_len:	[in] Length of the buffer above
+ *
+ * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE,
+ * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below.
+ */
+struct tee_ioctl_buf_data {
+	__u64 buf_ptr;
+	__u64 buf_len;
+};
+
+
+/*
+ * Attributes for struct tee_ioctl_param, selects field in the union
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE		0	/* parameter not used */
+
+/*
+ * These defines value parameters (struct tee_ioctl_param_value)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT	1
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT	2
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT	3	/* input and output */
+
+/*
+ * These defines shared memory reference parameters (struct
+ * tee_ioctl_param_memref)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT	5
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT	6
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT	7	/* input and output */
+
+/*
+ * Mask for the type part of the attribute, leaves room for more types
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK		0xff
+
+/*
+ * Matches TEEC_LOGIN_* in GP TEE Client API
+ * Is only defined for GP compliant TEEs
+ */
+#define TEE_IOCTL_LOGIN_PUBLIC			0
+#define TEE_IOCTL_LOGIN_USER			1
+#define TEE_IOCTL_LOGIN_GROUP			2
+#define TEE_IOCTL_LOGIN_APPLICATION		4
+#define TEE_IOCTL_LOGIN_USER_APPLICATION	5
+#define TEE_IOCTL_LOGIN_GROUP_APPLICATION	6
+
+/**
+ * struct tee_ioctl_param_memref - memory reference
+ * @shm_offs:	Offset into the shared memory object
+ * @size:	Size of the buffer
+ * @shm_id:	Shared memory identifier
+ *
+ * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an
+ * identifier representing the shared memory object. A memref can reference
+ * a part of a shared memory by specifying an offset (@shm_offs) and @size
+ * of the object. To supply the entire shared memory object set @shm_offs
+ * to 0 and @size to the previously returned size of the object.
+ */
+struct tee_ioctl_param_memref {
+	__u64 shm_offs;
+	__u64 size;
+	__s64 shm_id;
+};
+
+/**
+ * struct tee_ioctl_param_value - values
+ * @a: first value
+ * @b: second value
+ * @c: third value
+ */
+struct tee_ioctl_param_value {
+	__u64 a;
+	__u64 b;
+	__u64 c;
+};
+
+/**
+ * struct tee_ioctl_param - parameter
+ * @attr: attributes
+ * @memref: a memory reference
+ * @value: a value
+ *
+ * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in
+ * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and
+ * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE
+ * indicates that none of the members are used.
+ */
+struct tee_ioctl_param {
+	__u64 attr;
+	union {
+		struct tee_ioctl_param_memref memref;
+		struct tee_ioctl_param_value value;
+	} u;
+};
+
+#define TEE_IOCTL_UUID_LEN		16
+
+/**
+ * struct tee_ioctl_open_session_arg - Open session argument
+ * @uuid:	[in] UUID of the Trusted Application
+ * @clnt_uuid:	[in] UUID of client
+ * @clnt_login:	[in] Login class of client, TEE_LOGIN_* above
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @session:	[out] Session id
+ * @ret:	[out] return value
+ * @ret_origin	[out] origin of the return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_ioctl_open_session_arg {
+	__u8 uuid[TEE_IOCTL_UUID_LEN];
+	__u8 clnt_uuid[TEE_IOCTL_UUID_LEN];
+	__u32 clnt_login;
+	__u32 cancel_id;
+	__u32 session;
+	__u32 ret;
+	__u32 ret_origin;
+	__u32 num_params;
+	/*
+	 * this struct is 8 byte aligned since the 'struct tee_ioctl_param'
+	 * which follows requires 8 byte alignment.
+	 *
+	 * Commented out element used to visualize the layout dynamic part
+	 * of the struct. This field is not available at all if
+	 * num_params == 0.
+	 *
+	 * struct tee_ioctl_param params[num_params];
+	 */
+} __aligned(8);
+
+/**
+ * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_ioctl_open_session_arg followed by any array of struct
+ * tee_ioctl_param
+ */
+#define TEE_IOC_OPEN_SESSION	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \
+				     struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted Application
+ * @func:	[in] Trusted Application function, specific to the TA
+ * @session:	[in] Session id
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @ret:	[out] return value
+ * @ret_origin	[out] origin of the return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_ioctl_invoke_arg {
+	__u32 func;
+	__u32 session;
+	__u32 cancel_id;
+	__u32 ret;
+	__u32 ret_origin;
+	__u32 num_params;
+	/*
+	 * this struct is 8 byte aligned since the 'struct tee_ioctl_param'
+	 * which follows requires 8 byte alignment.
+	 *
+	 * Commented out element used to visualize the layout dynamic part
+	 * of the struct. This field is not available at all if
+	 * num_params == 0.
+	 *
+	 * struct tee_ioctl_param params[num_params];
+	 */
+} __aligned(8);
+
+/**
+ * TEE_IOC_INVOKE - Invokes a function in a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_invoke_func_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_INVOKE		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \
+				     struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl
+ * @cancel_id:	[in] Cancellation id, a unique value to identify this request
+ * @session:	[in] Session id, if the session is opened, else set to 0
+ */
+struct tee_ioctl_cancel_arg {
+	__u32 cancel_id;
+	__u32 session;
+};
+/**
+ * TEE_IOC_CANCEL - Cancels an open session or invoke
+ */
+#define TEE_IOC_CANCEL		_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \
+				     struct tee_ioctl_cancel_arg)
+
+/**
+ * struct tee_ioctl_close_session_arg - Closes an open session
+ * @session:	[in] Session id
+ */
+struct tee_ioctl_close_session_arg {
+	__u32 session;
+};
+/**
+ * TEE_IOC_CLOSE_SESSION - Closes a session
+ */
+#define TEE_IOC_CLOSE_SESSION	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \
+				     struct tee_ioctl_close_session_arg)
+
+/**
+ * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function
+ * @func:	[in] supplicant function
+ * @num_params	[in/out] number of parameters following this struct
+ *
+ * @num_params is the number of params that tee-supplicant has room to
+ * receive when input, @num_params is the number of actual params
+ * tee-supplicant receives when output.
+ */
+struct tee_iocl_supp_recv_arg {
+	__u32 func;
+	__u32 num_params;
+	/*
+	 * this struct is 8 byte aligned since the 'struct tee_ioctl_param'
+	 * which follows requires 8 byte alignment.
+	 *
+	 * Commented out element used to visualize the layout dynamic part
+	 * of the struct. This field is not available at all if
+	 * num_params == 0.
+	 *
+	 * struct tee_ioctl_param params[num_params];
+	 */
+} __aligned(8);
+/**
+ * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_recv_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_RECV	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \
+				     struct tee_ioctl_buf_data)
+
+
+/**
+ * struct tee_iocl_supp_send_arg - Send a response to a received request
+ * @ret:	[out] return value
+ * @num_params	[in] number of parameters following this struct
+ */
+struct tee_iocl_supp_send_arg {
+	__u32 ret;
+	__u32 num_params;
+	/*
+	 * this struct is 8 byte aligned since the 'struct tee_ioctl_param'
+	 * which follows requires 8 byte alignment.
+	 *
+	 * Commented out element used to visualize the layout dynamic part
+	 * of the struct. This field is not available at all if
+	 * num_params == 0.
+	 *
+	 * struct tee_ioctl_param params[num_params];
+	 */
+} __aligned(8);
+/**
+ * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_send_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_SEND	_IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \
+				     struct tee_ioctl_buf_data)
+
+
+/*
+ * Five syscalls are used when communicating with the TEE driver.
+ * open(): opens the device associated with the driver
+ * ioctl(): as described above operating on the file descriptor from open()
+ * close(): two cases
+ *   - closes the device file descriptor
+ *   - closes a file descriptor connected to allocated shared memory
+ * mmap(): maps shared memory into user space using information from struct
+ *	   tee_ioctl_shm_alloc_data
+ * munmap(): unmaps previously shared memory
+ */
+
+#endif /*__TEE_H*/
diff --git a/init/Kconfig b/init/Kconfig
index 235c7a2c0d20..5d9097e2b805 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -999,6 +999,23 @@ config CGROUP_CPUACCT
 	  Provides a simple Resource Controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup.
 
+config CGROUP_SCHEDTUNE
+	bool "CFS tasks boosting cgroup subsystem (EXPERIMENTAL)"
+	depends on SCHED_TUNE
+	help
+	  This option provides the "schedtune" controller which improves the
+	  flexibility of the task boosting mechanism by introducing the support
+	  to define "per task" boost values.
+
+	  This new controller:
+	  1. allows only a two layers hierarchy, where the root defines the
+	     system-wide boost value and its direct childrens define each one a
+	     different "class of tasks" to be boosted with a different value
+	  2. supports up to 16 different task classes, each one which could be
+	     configured with a different boost value
+
+	  Say N if unsure.
+
 config PAGE_COUNTER
        bool
 
@@ -1237,6 +1254,32 @@ config SCHED_AUTOGROUP
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TUNE
+	bool "Boosting for CFS tasks (EXPERIMENTAL)"
+	help
+	  This option enables the system-wide support for task boosting.
+	  When this support is enabled a new sysctl interface is exposed to
+	  userspace via:
+	     /proc/sys/kernel/sched_cfs_boost
+	  which allows to set a system-wide boost value in range [0..100].
+
+	  The currently boosting strategy is implemented in such a way that:
+	  - a 0% boost value requires to operate in "standard" mode by
+	    scheduling all tasks at the minimum capacities required by their
+	    workload demand
+	  - a 100% boost value requires to push at maximum the task
+	    performances, "regardless" of the incurred energy consumption
+
+	  A boost value in between these two boundaries is used to bias the
+	  power/performance trade-off, the higher the boost value the more the
+	  scheduler is biased toward performance boosting instead of energy
+	  efficiency.
+
+	  Since this support exposes a single system-wide knob, the specified
+	  boost value is applied to all (CFS) tasks in the system.
+
+	  If unsure, say N.
+
 config SYSFS_DEPRECATED
 	bool "Enable deprecated sysfs features to support old userspace tools"
 	depends on SYSFS
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 67687973ce80..c6a85f813dfd 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -12,10 +12,12 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
 obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
+obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o energy.o
 obj-y += wait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
+obj-$(CONFIG_SCHED_TUNE) += tune.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
+obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20253dbc8610..12da25560c91 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2412,7 +2412,7 @@ void wake_up_new_task(struct task_struct *p)
 #endif
 
 	rq = __task_rq_lock(p);
-	activate_task(rq, p, 0);
+	activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -2879,6 +2879,45 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 	return ns;
 }
 
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+static unsigned long sum_capacity_reqs(unsigned long cfs_cap,
+				       struct sched_capacity_reqs *scr)
+{
+	unsigned long total = cfs_cap + scr->rt;
+
+	total = total * capacity_margin;
+	total /= SCHED_CAPACITY_SCALE;
+	total += scr->dl;
+	return total;
+}
+
+static void sched_freq_tick(int cpu)
+{
+	struct sched_capacity_reqs *scr;
+	unsigned long capacity_orig, capacity_curr;
+
+	if (!sched_freq())
+		return;
+
+	capacity_orig = capacity_orig_of(cpu);
+	capacity_curr = capacity_curr_of(cpu);
+	if (capacity_curr == capacity_orig)
+		return;
+
+	/*
+	 * To make free room for a task that is building up its "real"
+	 * utilization and to harm its performance the least, request
+	 * a jump to max OPP as soon as the margin of free capacity is
+	 * impacted (specified by capacity_margin).
+	 */
+	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+	if (capacity_curr < sum_capacity_reqs(cpu_util(cpu), scr))
+		set_cfs_cpu_capacity(cpu, true, capacity_max);
+}
+#else
+static inline void sched_freq_tick(int cpu) { }
+#endif
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -2896,6 +2935,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	update_cpu_load_active(rq);
 	calc_global_load_tick(rq);
+	sched_freq_tick(cpu);
 	raw_spin_unlock(&rq->lock);
 
 	perf_event_task_tick();
@@ -5401,9 +5441,60 @@ set_table_entry(struct ctl_table *entry,
 }
 
 static struct ctl_table *
+sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(5);
+
+	if (table == NULL)
+		return NULL;
+
+	set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
+			sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
+			sge->nr_idle_states*sizeof(struct idle_state), 0644,
+			proc_doulongvec_minmax, false);
+	set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
+			sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
+			sge->nr_cap_states*sizeof(struct capacity_state), 0644,
+			proc_doulongvec_minmax, false);
+
+	return table;
+}
+
+static struct ctl_table *
+sd_alloc_ctl_group_table(struct sched_group *sg)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(2);
+
+	if (table == NULL)
+		return NULL;
+
+	table->procname = kstrdup("energy", GFP_KERNEL);
+	table->mode = 0555;
+	table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
+
+	return table;
+}
+
+static struct ctl_table *
 sd_alloc_ctl_domain_table(struct sched_domain *sd)
 {
-	struct ctl_table *table = sd_alloc_ctl_entry(14);
+	struct ctl_table *table;
+	unsigned int nr_entries = 14;
+
+	int i = 0;
+	struct sched_group *sg = sd->groups;
+
+	if (sg->sge) {
+		int nr_sgs = 0;
+
+		do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
+
+		nr_entries += nr_sgs;
+	}
+
+	table = sd_alloc_ctl_entry(nr_entries);
 
 	if (table == NULL)
 		return NULL;
@@ -5436,7 +5527,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
 		sizeof(long), 0644, proc_doulongvec_minmax, false);
 	set_table_entry(&table[12], "name", sd->name,
 		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-	/* &table[13] is terminator */
+	sg = sd->groups;
+	if (sg->sge) {
+		char buf[32];
+		struct ctl_table *entry = &table[13];
+
+		do {
+			snprintf(buf, 32, "group%d", i);
+			entry->procname = kstrdup(buf, GFP_KERNEL);
+			entry->mode = 0555;
+			entry->child = sd_alloc_ctl_group_table(sg);
+		} while (entry++, i++, sg = sg->next, sg != sd->groups);
+	}
+	/* &table[nr_entries-1] is terminator */
 
 	return table;
 }
@@ -5743,7 +5846,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		printk(KERN_CONT " %*pbl",
 		       cpumask_pr_args(sched_group_cpus(group)));
 		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-			printk(KERN_CONT " (cpu_capacity = %d)",
+			printk(KERN_CONT " (cpu_capacity = %lu)",
 				group->sgc->capacity);
 		}
 
@@ -5804,7 +5907,8 @@ static int sd_degenerate(struct sched_domain *sd)
 			 SD_BALANCE_EXEC |
 			 SD_SHARE_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
-			 SD_SHARE_POWERDOMAIN)) {
+			 SD_SHARE_POWERDOMAIN |
+			 SD_SHARE_CAP_STATES)) {
 		if (sd->groups != sd->groups->next)
 			return 0;
 	}
@@ -5836,7 +5940,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
-				SD_SHARE_POWERDOMAIN);
+				SD_SHARE_POWERDOMAIN |
+				SD_SHARE_CAP_STATES);
 		if (nr_node_ids == 1)
 			pflags &= ~SD_SERIALIZE;
 	}
@@ -5915,6 +6020,8 @@ static int init_rootdomain(struct root_domain *rd)
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_rto_mask;
+
+	init_max_cpu_capacity(&rd->max_cpu_capacity);
 	return 0;
 
 free_rto_mask:
@@ -6020,11 +6127,13 @@ DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+DEFINE_PER_CPU(struct sched_domain *, sd_scs);
 
 static void update_top_cache_domain(int cpu)
 {
 	struct sched_domain *sd;
-	struct sched_domain *busy_sd = NULL;
+	struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
 	int id = cpu;
 	int size = 1;
 
@@ -6045,6 +6154,17 @@ static void update_top_cache_domain(int cpu)
 
 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
 	rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+
+	for_each_domain(cpu, sd) {
+		if (sd->groups->sge)
+			ea_sd = sd;
+		else
+			break;
+	}
+	rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+
+	sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+	rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
 }
 
 /*
@@ -6205,6 +6325,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		 * die on a /0 trap.
 		 */
 		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+		sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -6334,6 +6455,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 }
 
 /*
+ * Check that the per-cpu provided sd energy data is consistent for all cpus
+ * within the mask.
+ */
+static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
+					   const struct cpumask *cpumask)
+{
+	const struct sched_group_energy * const sge = fn(cpu);
+	struct cpumask mask;
+	int i;
+
+	if (cpumask_weight(cpumask) <= 1)
+		return;
+
+	cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
+
+	for_each_cpu(i, &mask) {
+		const struct sched_group_energy * const e = fn(i);
+		int y;
+
+		BUG_ON(e->nr_idle_states != sge->nr_idle_states);
+
+		for (y = 0; y < (e->nr_idle_states); y++) {
+			BUG_ON(e->idle_states[y].power !=
+					sge->idle_states[y].power);
+		}
+
+		BUG_ON(e->nr_cap_states != sge->nr_cap_states);
+
+		for (y = 0; y < (e->nr_cap_states); y++) {
+			BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
+			BUG_ON(e->cap_states[y].power !=
+					sge->cap_states[y].power);
+		}
+	}
+}
+
+static void init_sched_energy(int cpu, struct sched_domain *sd,
+			      sched_domain_energy_f fn)
+{
+	if (!(fn && fn(cpu)))
+		return;
+
+	if (cpu != group_balance_cpu(sd->groups))
+		return;
+
+	if (sd->child && !sd->child->groups->sge) {
+		pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
+#ifdef CONFIG_SCHED_DEBUG
+		pr_err("     energy data on %s but not on %s domain\n",
+			sd->name, sd->child->name);
+#endif
+		return;
+	}
+
+	check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
+
+	sd->groups->sge = fn(cpu);
+}
+
+/*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
  */
@@ -6441,6 +6622,7 @@ static int sched_domains_curr_level;
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ * SD_SHARE_CAP_STATES    - describes shared capacity states
  *
  * Odd one out:
  * SD_ASYM_PACKING        - describes SMT quirks
@@ -6450,7 +6632,8 @@ static int sched_domains_curr_level;
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
-	 SD_SHARE_POWERDOMAIN)
+	 SD_SHARE_POWERDOMAIN |		\
+	 SD_SHARE_CAP_STATES)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl, int cpu)
@@ -7000,6 +7183,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 	enum s_alloc alloc_state;
 	struct sched_domain *sd;
 	struct s_data d;
+	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -7038,10 +7222,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 
 	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
+		struct sched_domain_topology_level *tl = sched_domain_topology;
+
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
-		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+			init_sched_energy(i, sd, tl->energy);
 			claim_allocations(i, sd);
 			init_sched_groups_capacity(i, sd);
 		}
@@ -7050,6 +7237,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
+		rq = cpu_rq(i);
 		sd = *per_cpu_ptr(d.sd, i);
 		cpu_attach_domain(sd, d.rd, i);
 	}
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
new file mode 100644
index 000000000000..e1d208e101ed
--- /dev/null
+++ b/kernel/sched/cpufreq_sched.c
@@ -0,0 +1,367 @@
+/*
+ *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+#include <linux/irq_work.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpufreq_sched.h>
+
+#include "sched.h"
+
+#define THROTTLE_NSEC		50000000 /* 50ms default */
+
+struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
+static bool __read_mostly cpufreq_driver_slow;
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
+static struct cpufreq_governor cpufreq_gov_sched;
+#endif
+
+static DEFINE_PER_CPU(unsigned long, enabled);
+DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+
+/**
+ * gov_data - per-policy data internal to the governor
+ * @throttle: next throttling period expiry. Derived from throttle_nsec
+ * @throttle_nsec: throttle period length in nanoseconds
+ * @task: worker thread for dvfs transition that may block/sleep
+ * @irq_work: callback used to wake up worker thread
+ * @requested_freq: last frequency requested by the sched governor
+ *
+ * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
+ * per-policy instance of it is created when the cpufreq_sched governor receives
+ * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
+ * member of struct cpufreq_policy.
+ *
+ * Readers of this data must call down_read(policy->rwsem). Writers must
+ * call down_write(policy->rwsem).
+ */
+struct gov_data {
+	ktime_t throttle;
+	unsigned int throttle_nsec;
+	struct task_struct *task;
+	struct irq_work irq_work;
+	unsigned int requested_freq;
+};
+
+static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
+					    unsigned int freq)
+{
+	struct gov_data *gd = policy->governor_data;
+
+	/* avoid race with cpufreq_sched_stop */
+	if (!down_write_trylock(&policy->rwsem))
+		return;
+
+	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
+
+	gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec);
+	up_write(&policy->rwsem);
+}
+
+static bool finish_last_request(struct gov_data *gd)
+{
+	ktime_t now = ktime_get();
+
+	if (ktime_after(now, gd->throttle))
+		return false;
+
+	while (1) {
+		int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now));
+
+		usec_left /= NSEC_PER_USEC;
+		trace_cpufreq_sched_throttled(usec_left);
+		usleep_range(usec_left, usec_left + 100);
+		now = ktime_get();
+		if (ktime_after(now, gd->throttle))
+			return true;
+	}
+}
+
+/*
+ * we pass in struct cpufreq_policy. This is safe because changing out the
+ * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
+ * which tears down all of the data structures and __cpufreq_governor(policy,
+ * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
+ * new policy pointer
+ */
+static int cpufreq_sched_thread(void *data)
+{
+	struct sched_param param;
+	struct cpufreq_policy *policy;
+	struct gov_data *gd;
+	unsigned int new_request = 0;
+	unsigned int last_request = 0;
+	int ret;
+
+	policy = (struct cpufreq_policy *) data;
+	gd = policy->governor_data;
+
+	param.sched_priority = 50;
+	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
+	if (ret) {
+		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
+		do_exit(-EINVAL);
+	} else {
+		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
+				__func__, gd->task->pid);
+	}
+
+	do {
+		new_request = gd->requested_freq;
+		if (new_request == last_request) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		} else {
+			/*
+			 * if the frequency thread sleeps while waiting to be
+			 * unthrottled, start over to check for a newer request
+			 */
+			if (finish_last_request(gd))
+				continue;
+			last_request = new_request;
+			cpufreq_sched_try_driver_target(policy, new_request);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static void cpufreq_sched_irq_work(struct irq_work *irq_work)
+{
+	struct gov_data *gd;
+
+	gd = container_of(irq_work, struct gov_data, irq_work);
+	if (!gd)
+		return;
+
+	wake_up_process(gd->task);
+}
+
+static void update_fdomain_capacity_request(int cpu)
+{
+	unsigned int freq_new, index_new, cpu_tmp;
+	struct cpufreq_policy *policy;
+	struct gov_data *gd;
+	unsigned long capacity = 0;
+
+	/*
+	 * Avoid grabbing the policy if possible. A test is still
+	 * required after locking the CPU's policy to avoid racing
+	 * with the governor changing.
+	 */
+	if (!per_cpu(enabled, cpu))
+		return;
+
+	policy = cpufreq_cpu_get(cpu);
+	if (IS_ERR_OR_NULL(policy))
+		return;
+
+	if (policy->governor != &cpufreq_gov_sched ||
+	    !policy->governor_data)
+		goto out;
+
+	gd = policy->governor_data;
+
+	/* find max capacity requested by cpus in this policy */
+	for_each_cpu(cpu_tmp, policy->cpus) {
+		struct sched_capacity_reqs *scr;
+
+		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
+		capacity = max(capacity, scr->total);
+	}
+
+	/* Convert the new maximum capacity request into a cpu frequency */
+	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
+	if (cpufreq_frequency_table_target(policy, policy->freq_table,
+					   freq_new, CPUFREQ_RELATION_L,
+					   &index_new))
+		goto out;
+	freq_new = policy->freq_table[index_new].frequency;
+
+	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
+					gd->requested_freq);
+
+	if (freq_new == gd->requested_freq)
+		goto out;
+
+	gd->requested_freq = freq_new;
+
+	/*
+	 * Throttling is not yet supported on platforms with fast cpufreq
+	 * drivers.
+	 */
+	if (cpufreq_driver_slow)
+		irq_work_queue_on(&gd->irq_work, cpu);
+	else
+		cpufreq_sched_try_driver_target(policy, freq_new);
+
+out:
+	cpufreq_cpu_put(policy);
+}
+
+void update_cpu_capacity_request(int cpu, bool request)
+{
+	unsigned long new_capacity;
+	struct sched_capacity_reqs *scr;
+
+	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
+	lockdep_assert_held(&cpu_rq(cpu)->lock);
+
+	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+
+	new_capacity = scr->cfs + scr->rt;
+	new_capacity = new_capacity * capacity_margin
+		/ SCHED_CAPACITY_SCALE;
+	new_capacity += scr->dl;
+
+	if (new_capacity == scr->total)
+		return;
+
+	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
+
+	scr->total = new_capacity;
+	if (request)
+		update_fdomain_capacity_request(cpu);
+}
+
+static inline void set_sched_freq(void)
+{
+	static_key_slow_inc(&__sched_freq);
+}
+
+static inline void clear_sched_freq(void)
+{
+	static_key_slow_dec(&__sched_freq);
+}
+
+static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
+{
+	struct gov_data *gd;
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
+		       sizeof(struct sched_capacity_reqs));
+
+	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
+	if (!gd)
+		return -ENOMEM;
+
+	gd->throttle_nsec = policy->cpuinfo.transition_latency ?
+			    policy->cpuinfo.transition_latency :
+			    THROTTLE_NSEC;
+	pr_debug("%s: throttle threshold = %u [ns]\n",
+		  __func__, gd->throttle_nsec);
+
+	if (cpufreq_driver_is_slow()) {
+		cpufreq_driver_slow = true;
+		gd->task = kthread_create(cpufreq_sched_thread, policy,
+					  "kschedfreq:%d",
+					  cpumask_first(policy->related_cpus));
+		if (IS_ERR_OR_NULL(gd->task)) {
+			pr_err("%s: failed to create kschedfreq thread\n",
+			       __func__);
+			goto err;
+		}
+		get_task_struct(gd->task);
+		kthread_bind_mask(gd->task, policy->related_cpus);
+		wake_up_process(gd->task);
+		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
+	}
+
+	policy->governor_data = gd;
+	set_sched_freq();
+
+	return 0;
+
+err:
+	kfree(gd);
+	return -ENOMEM;
+}
+
+static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
+{
+	struct gov_data *gd = policy->governor_data;
+
+	clear_sched_freq();
+	if (cpufreq_driver_slow) {
+		kthread_stop(gd->task);
+		put_task_struct(gd->task);
+	}
+
+	policy->governor_data = NULL;
+
+	kfree(gd);
+	return 0;
+}
+
+static int cpufreq_sched_start(struct cpufreq_policy *policy)
+{
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(enabled, cpu) = 1;
+
+	return 0;
+}
+
+static int cpufreq_sched_stop(struct cpufreq_policy *policy)
+{
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(enabled, cpu) = 0;
+
+	return 0;
+}
+
+static int cpufreq_sched_setup(struct cpufreq_policy *policy,
+			       unsigned int event)
+{
+	switch (event) {
+	case CPUFREQ_GOV_POLICY_INIT:
+		return cpufreq_sched_policy_init(policy);
+	case CPUFREQ_GOV_POLICY_EXIT:
+		return cpufreq_sched_policy_exit(policy);
+	case CPUFREQ_GOV_START:
+		return cpufreq_sched_start(policy);
+	case CPUFREQ_GOV_STOP:
+		return cpufreq_sched_stop(policy);
+	case CPUFREQ_GOV_LIMITS:
+		break;
+	}
+	return 0;
+}
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
+static
+#endif
+struct cpufreq_governor cpufreq_gov_sched = {
+	.name			= "sched",
+	.governor		= cpufreq_sched_setup,
+	.owner			= THIS_MODULE,
+};
+
+static int __init cpufreq_sched_init(void)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_possible_mask)
+		per_cpu(enabled, cpu) = 0;
+	return cpufreq_register_governor(&cpufreq_gov_sched);
+}
+
+/* Try to make this the default governor */
+fs_initcall(cpufreq_sched_init);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e984f059e5fc..f10b1cb255b2 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -43,6 +43,24 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
 	return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 
+static void add_average_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+	u64 se_bw = dl_se->dl_bw;
+
+	dl_rq->avg_bw += se_bw;
+}
+
+static void clear_average_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+	u64 se_bw = dl_se->dl_bw;
+
+	dl_rq->avg_bw -= se_bw;
+	if (dl_rq->avg_bw < 0) {
+		WARN_ON(1);
+		dl_rq->avg_bw = 0;
+	}
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -494,6 +512,9 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
+	if (dl_se->dl_new)
+		add_average_bw(dl_se, dl_rq);
+
 	/*
 	 * The arrival of a new instance needs special treatment, i.e.,
 	 * the actual scheduling parameters have to be "renewed".
@@ -741,8 +762,6 @@ static void update_curr_dl(struct rq *rq)
 	curr->se.exec_start = rq_clock_task(rq);
 	cpuacct_charge(curr, delta_exec);
 
-	sched_rt_avg_update(rq, delta_exec);
-
 	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
 	if (dl_runtime_exceeded(dl_se)) {
 		dl_se->dl_throttled = 1;
@@ -1241,6 +1260,8 @@ static void task_fork_dl(struct task_struct *p)
 static void task_dead_dl(struct task_struct *p)
 {
 	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+	struct dl_rq *dl_rq = dl_rq_of_se(&p->dl);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
 
 	/*
 	 * Since we are TASK_DEAD we won't slip out of the domain!
@@ -1249,6 +1270,8 @@ static void task_dead_dl(struct task_struct *p)
 	/* XXX we should retain the bw until 0-lag */
 	dl_b->total_bw -= p->dl.dl_bw;
 	raw_spin_unlock_irq(&dl_b->lock);
+
+	clear_average_bw(&p->dl, &rq->dl);
 }
 
 static void set_curr_task_dl(struct rq *rq)
@@ -1556,7 +1579,9 @@ retry:
 	}
 
 	deactivate_task(rq, next_task, 0);
+	clear_average_bw(&next_task->dl, &rq->dl);
 	set_task_cpu(next_task, later_rq->cpu);
+	add_average_bw(&next_task->dl, &later_rq->dl);
 	activate_task(later_rq, next_task, 0);
 	ret = 1;
 
@@ -1644,7 +1669,9 @@ static void pull_dl_task(struct rq *this_rq)
 			resched = true;
 
 			deactivate_task(src_rq, p, 0);
+			clear_average_bw(&p->dl, &src_rq->dl);
 			set_task_cpu(p, this_cpu);
+			add_average_bw(&p->dl, &this_rq->dl);
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
 
@@ -1750,6 +1777,8 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	if (!start_dl_timer(p))
 		__dl_clear_params(p);
 
+	clear_average_bw(&p->dl, &rq->dl);
+
 	/*
 	 * Since this might be the only -deadline task on the rq,
 	 * this is the right place to try to pull some other one
diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c
new file mode 100644
index 000000000000..b0656b7a93e3
--- /dev/null
+++ b/kernel/sched/energy.c
@@ -0,0 +1,124 @@
+/*
+ * Obtain energy cost data from DT and populate relevant scheduler data
+ * structures.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define pr_fmt(fmt) "sched-energy: " fmt
+
+#define DEBUG
+
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/sched_energy.h>
+#include <linux/stddef.h>
+
+struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS];
+
+static void free_resources(void)
+{
+	int cpu, sd_level;
+	struct sched_group_energy *sge;
+
+	for_each_possible_cpu(cpu) {
+		for_each_possible_sd_level(sd_level) {
+			sge = sge_array[cpu][sd_level];
+			if (sge) {
+				kfree(sge->cap_states);
+				kfree(sge->idle_states);
+				kfree(sge);
+			}
+		}
+	}
+}
+
+void init_sched_energy_costs(void)
+{
+	struct device_node *cn, *cp;
+	struct capacity_state *cap_states;
+	struct idle_state *idle_states;
+	struct sched_group_energy *sge;
+	const struct property *prop;
+	int sd_level, i, nstates, cpu;
+	const __be32 *val;
+
+	for_each_possible_cpu(cpu) {
+		cn = of_get_cpu_node(cpu, NULL);
+		if (!cn) {
+			pr_warn("CPU device node missing for CPU %d\n", cpu);
+			return;
+		}
+
+		if (!of_find_property(cn, "sched-energy-costs", NULL)) {
+			pr_warn("CPU device node has no sched-energy-costs\n");
+			return;
+		}
+
+		for_each_possible_sd_level(sd_level) {
+			cp = of_parse_phandle(cn, "sched-energy-costs", sd_level);
+			if (!cp)
+				break;
+
+			prop = of_find_property(cp, "busy-cost-data", NULL);
+			if (!prop || !prop->value) {
+				pr_warn("No busy-cost data, skipping sched_energy init\n");
+				goto out;
+			}
+
+			sge = kcalloc(1, sizeof(struct sched_group_energy),
+				      GFP_NOWAIT);
+
+			nstates = (prop->length / sizeof(u32)) / 2;
+			cap_states = kcalloc(nstates,
+					     sizeof(struct capacity_state),
+					     GFP_NOWAIT);
+
+			for (i = 0, val = prop->value; i < nstates; i++) {
+				cap_states[i].cap = be32_to_cpup(val++);
+				cap_states[i].power = be32_to_cpup(val++);
+			}
+
+			sge->nr_cap_states = nstates;
+			sge->cap_states = cap_states;
+
+			prop = of_find_property(cp, "idle-cost-data", NULL);
+			if (!prop || !prop->value) {
+				pr_warn("No idle-cost data, skipping sched_energy init\n");
+				goto out;
+			}
+
+			nstates = (prop->length / sizeof(u32));
+			idle_states = kcalloc(nstates,
+					      sizeof(struct idle_state),
+					      GFP_NOWAIT);
+
+			for (i = 0, val = prop->value; i < nstates; i++)
+				idle_states[i].power = be32_to_cpup(val++);
+
+			sge->nr_idle_states = nstates;
+			sge->idle_states = idle_states;
+
+			sge_array[cpu][sd_level] = sge;
+		}
+	}
+
+	pr_info("Sched-energy-costs installed from DT\n");
+	return;
+
+out:
+	free_resources();
+}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 812069b66f47..ae6207c63da5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -34,6 +34,7 @@
 #include <trace/events/sched.h>
 
 #include "sched.h"
+#include "tune.h"
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -2600,6 +2601,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 
 	scale_freq = arch_scale_freq_capacity(NULL, cpu);
 	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+	trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu);
 
 	/* delta_w is the amount already accumulated against our next period */
 	delta_w = sa->period_contrib;
@@ -2760,6 +2762,10 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
 
 	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
 		update_tg_load_avg(cfs_rq, 0);
+
+	if (entity_is_task(se))
+		trace_sched_load_avg_task(task_of(se), &se->avg);
+	trace_sched_load_avg_cpu(cpu, cfs_rq);
 }
 
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -2840,27 +2846,45 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
-/*
- * Task first catches up with cfs_rq, and then subtract
- * itself from the cfs_rq (task must be off the queue now).
- */
-void remove_entity_load_avg(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 last_update_time;
-
 #ifndef CONFIG_64BIT
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
 	u64 last_update_time_copy;
+	u64 last_update_time;
 
 	do {
 		last_update_time_copy = cfs_rq->load_last_update_time_copy;
 		smp_rmb();
 		last_update_time = cfs_rq->avg.last_update_time;
 	} while (last_update_time != last_update_time_copy);
+
+	return last_update_time;
+}
 #else
-	last_update_time = cfs_rq->avg.last_update_time;
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->avg.last_update_time;
+}
 #endif
 
+/*
+ * Task first catches up with cfs_rq, and then subtract
+ * itself from the cfs_rq (task must be off the queue now).
+ */
+void remove_entity_load_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 last_update_time;
+
+	/*
+	 * Newly created task or never used group entity should not be removed
+	 * from its (source) cfs_rq
+	 */
+	if (se->avg.last_update_time == 0)
+		return;
+
+	last_update_time = cfs_rq_last_update_time(cfs_rq);
+
 	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
 	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
 	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
@@ -4177,6 +4201,23 @@ static inline void hrtick_update(struct rq *rq)
 }
 #endif
 
+static inline unsigned long boosted_cpu_util(int cpu);
+
+static void update_capacity_of(int cpu)
+{
+	unsigned long req_cap;
+
+	if (!sched_freq())
+		return;
+
+	/* Convert scale-invariant capacity to cpu. */
+	req_cap = boosted_cpu_util(cpu);
+	req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
+	set_cfs_cpu_capacity(cpu, true, req_cap);
+}
+
+static bool cpu_overutilized(int cpu);
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -4187,6 +4228,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int task_new = flags & ENQUEUE_WAKEUP_NEW;
+	int task_wakeup = flags & ENQUEUE_WAKEUP;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
@@ -4218,9 +4261,24 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		update_cfs_shares(cfs_rq);
 	}
 
-	if (!se)
+	if (!se) {
 		add_nr_running(rq, 1);
+		if (!task_new && !rq->rd->overutilized &&
+		    cpu_overutilized(rq->cpu))
+			rq->rd->overutilized = true;
 
+		schedtune_enqueue_task(p, cpu_of(rq));
+
+		/*
+		 * We want to potentially trigger a freq switch
+		 * request only for tasks that are waking up; this is
+		 * because we get here also during load balancing, but
+		 * in these cases it seems wise to trigger as single
+		 * request after load balancing is done.
+		 */
+		if (task_new || task_wakeup)
+			update_capacity_of(cpu_of(rq));
+	}
 	hrtick_update(rq);
 }
 
@@ -4277,9 +4335,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		update_cfs_shares(cfs_rq);
 	}
 
-	if (!se)
+	if (!se) {
 		sub_nr_running(rq, 1);
+		schedtune_dequeue_task(p, cpu_of(rq));
 
+		/*
+		 * We want to potentially trigger a freq switch
+		 * request only for tasks that are going to sleep;
+		 * this is because we get here also during load
+		 * balancing, but in these cases it seems wise to
+		 * trigger as single request after load balancing is
+		 * done.
+		 */
+		if (task_sleep) {
+			if (rq->cfs.nr_running)
+				update_capacity_of(cpu_of(rq));
+			else if (sched_freq())
+				set_cfs_cpu_capacity(cpu_of(rq), false, 0);
+		}
+	}
 	hrtick_update(rq);
 }
 
@@ -4506,15 +4580,6 @@ static unsigned long target_load(int cpu, int type)
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long capacity_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity;
-}
-
-static unsigned long capacity_orig_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity_orig;
-}
 
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
@@ -4689,6 +4754,352 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 #endif
 
 /*
+ * Returns the current capacity of cpu after applying both
+ * cpu and freq scaling.
+ */
+unsigned long capacity_curr_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig *
+	       arch_scale_freq_capacity(NULL, cpu)
+	       >> SCHED_CAPACITY_SHIFT;
+}
+
+static inline bool energy_aware(void)
+{
+	return sched_feat(ENERGY_AWARE);
+}
+
+struct energy_env {
+	struct sched_group	*sg_top;
+	struct sched_group	*sg_cap;
+	int			cap_idx;
+	int			util_delta;
+	int			src_cpu;
+	int			dst_cpu;
+	int			energy;
+	int			payoff;
+	struct task_struct	*task;
+	struct {
+		int before;
+		int after;
+		int delta;
+		int diff;
+	} nrg;
+	struct {
+		int before;
+		int after;
+		int delta;
+	} cap;
+};
+
+/*
+ * __cpu_norm_util() returns the cpu util relative to a specific capacity,
+ * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
+ * energy calculations. Using the scale-invariant util returned by
+ * cpu_util() and approximating scale-invariant util by:
+ *
+ *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
+ *
+ * the normalized util can be found using the specific capacity.
+ *
+ *   capacity = capacity_orig * curr_freq/max_freq
+ *
+ *   norm_util = running_time/time ~ util/capacity
+ */
+static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
+{
+	int util = __cpu_util(cpu, delta);
+
+	if (util >= capacity)
+		return SCHED_CAPACITY_SCALE;
+
+	return (util << SCHED_CAPACITY_SHIFT)/capacity;
+}
+
+static int calc_util_delta(struct energy_env *eenv, int cpu)
+{
+	if (cpu == eenv->src_cpu)
+		return -eenv->util_delta;
+	if (cpu == eenv->dst_cpu)
+		return eenv->util_delta;
+	return 0;
+}
+
+static
+unsigned long group_max_util(struct energy_env *eenv)
+{
+	int i, delta;
+	unsigned long max_util = 0;
+
+	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
+		delta = calc_util_delta(eenv, i);
+		max_util = max(max_util, __cpu_util(i, delta));
+	}
+
+	return max_util;
+}
+
+/*
+ * group_norm_util() returns the approximated group util relative to it's
+ * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in
+ * energy calculations. Since task executions may or may not overlap in time in
+ * the group the true normalized util is between max(cpu_norm_util(i)) and
+ * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
+ * latter is used as the estimate as it leads to a more pessimistic energy
+ * estimate (more busy).
+ */
+static unsigned
+long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
+{
+	int i, delta;
+	unsigned long util_sum = 0;
+	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		delta = calc_util_delta(eenv, i);
+		util_sum += __cpu_norm_util(i, capacity, delta);
+	}
+
+	if (util_sum > SCHED_CAPACITY_SCALE)
+		return SCHED_CAPACITY_SCALE;
+	return util_sum;
+}
+
+static int find_new_capacity(struct energy_env *eenv,
+	const struct sched_group_energy const *sge)
+{
+	int idx;
+	unsigned long util = group_max_util(eenv);
+
+	for (idx = 0; idx < sge->nr_cap_states; idx++) {
+		if (sge->cap_states[idx].cap >= util)
+			break;
+	}
+
+	eenv->cap_idx = idx;
+
+	return idx;
+}
+
+static int group_idle_state(struct sched_group *sg)
+{
+	int i, state = INT_MAX;
+
+	/* Find the shallowest idle state in the sched group. */
+	for_each_cpu(i, sched_group_cpus(sg))
+		state = min(state, idle_get_state_idx(cpu_rq(i)));
+
+	/* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
+	state++;
+
+	return state;
+}
+
+/*
+ * sched_group_energy(): Computes the absolute energy consumption of cpus
+ * belonging to the sched_group including shared resources shared only by
+ * members of the group. Iterates over all cpus in the hierarchy below the
+ * sched_group starting from the bottom working it's way up before going to
+ * the next cpu until all cpus are covered at all levels. The current
+ * implementation is likely to gather the same util statistics multiple times.
+ * This can probably be done in a faster but more complex way.
+ * Note: sched_group_energy() may fail when racing with sched_domain updates.
+ */
+static int sched_group_energy(struct energy_env *eenv)
+{
+	struct sched_domain *sd;
+	int cpu, total_energy = 0;
+	struct cpumask visit_cpus;
+	struct sched_group *sg;
+
+	WARN_ON(!eenv->sg_top->sge);
+
+	cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+
+	while (!cpumask_empty(&visit_cpus)) {
+		struct sched_group *sg_shared_cap = NULL;
+
+		cpu = cpumask_first(&visit_cpus);
+
+		/*
+		 * Is the group utilization affected by cpus outside this
+		 * sched_group?
+		 */
+		sd = rcu_dereference(per_cpu(sd_scs, cpu));
+
+		if (!sd)
+			/*
+			 * We most probably raced with hotplug; returning a
+			 * wrong energy estimation is better than entering an
+			 * infinite loop.
+			 */
+			return -EINVAL;
+
+		if (sd->parent)
+			sg_shared_cap = sd->parent->groups;
+
+		for_each_domain(cpu, sd) {
+			sg = sd->groups;
+
+			/* Has this sched_domain already been visited? */
+			if (sd->child && group_first_cpu(sg) != cpu)
+				break;
+
+			do {
+				unsigned long group_util;
+				int sg_busy_energy, sg_idle_energy;
+				int cap_idx, idle_idx;
+
+				if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
+					eenv->sg_cap = sg_shared_cap;
+				else
+					eenv->sg_cap = sg;
+
+				cap_idx = find_new_capacity(eenv, sg->sge);
+
+				if (sg->group_weight == 1) {
+					/* Remove capacity of src CPU (before task move) */
+					if (eenv->util_delta == 0 &&
+					    cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
+						eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
+						eenv->cap.delta -= eenv->cap.before;
+					}
+					/* Add capacity of dst CPU  (after task move) */
+					if (eenv->util_delta != 0 &&
+					    cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
+						eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
+						eenv->cap.delta += eenv->cap.after;
+					}
+				}
+
+				idle_idx = group_idle_state(sg);
+				group_util = group_norm_util(eenv, sg);
+				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
+								>> SCHED_CAPACITY_SHIFT;
+				sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)
+								* sg->sge->idle_states[idle_idx].power)
+								>> SCHED_CAPACITY_SHIFT;
+
+				total_energy += sg_busy_energy + sg_idle_energy;
+
+				if (!sd->child)
+					cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
+
+				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
+					goto next_cpu;
+
+			} while (sg = sg->next, sg != sd->groups);
+		}
+next_cpu:
+		continue;
+	}
+
+	eenv->energy = total_energy;
+	return 0;
+}
+
+static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
+{
+	return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg));
+}
+
+#ifdef CONFIG_SCHED_TUNE
+static int energy_diff_evaluate(struct energy_env *eenv)
+{
+	unsigned int boost;
+	int nrg_delta;
+
+	/* Return energy diff when boost margin is 0 */
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boost = schedtune_task_boost(eenv->task);
+#else
+	boost = get_sysctl_sched_cfs_boost();
+#endif
+	if (boost == 0)
+		return eenv->nrg.diff;
+
+	/* Compute normalized energy diff */
+	nrg_delta = schedtune_normalize_energy(eenv->nrg.diff);
+	eenv->nrg.delta = nrg_delta;
+
+	eenv->payoff = schedtune_accept_deltas(
+			eenv->nrg.delta,
+			eenv->cap.delta,
+			eenv->task);
+
+	/*
+	 * When SchedTune is enabled, the energy_diff() function will return
+	 * the computed energy payoff value. Since the energy_diff() return
+	 * value is expected to be negative by its callers, this evaluation
+	 * function return a negative value each time the evaluation return a
+	 * positive payoff, which is the condition for the acceptance of
+	 * a scheduling decision
+	 */
+	return -eenv->payoff;
+}
+#else /* CONFIG_SCHED_TUNE */
+#define energy_diff_evaluate(eenv) eenv->nrg.diff
+#endif
+
+/*
+ * energy_diff(): Estimate the energy impact of changing the utilization
+ * distribution. eenv specifies the change: utilisation amount, source, and
+ * destination cpu. Source or destination cpu may be -1 in which case the
+ * utilization is removed from or added to the system (e.g. task wake-up). If
+ * both are specified, the utilization is migrated.
+ */
+static int energy_diff(struct energy_env *eenv)
+{
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	int sd_cpu = -1, energy_before = 0, energy_after = 0;
+
+	struct energy_env eenv_before = {
+		.util_delta	= 0,
+		.src_cpu	= eenv->src_cpu,
+		.dst_cpu	= eenv->dst_cpu,
+		.nrg		= { 0, 0, 0, 0},
+		.cap		= { 0, 0, 0 },
+	};
+
+	if (eenv->src_cpu == eenv->dst_cpu)
+		return 0;
+
+	sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
+	sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
+
+	if (!sd)
+		return 0; /* Error */
+
+	sg = sd->groups;
+
+	do {
+		if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) {
+			eenv_before.sg_top = eenv->sg_top = sg;
+
+			if (sched_group_energy(&eenv_before))
+				return 0; /* Invalid result abort */
+			energy_before += eenv_before.energy;
+
+			/* Keep track of SRC cpu (before) capacity */
+			eenv->cap.before = eenv_before.cap.before;
+			eenv->cap.delta = eenv_before.cap.delta;
+
+			if (sched_group_energy(eenv))
+				return 0; /* Invalid result abort */
+			energy_after += eenv->energy;
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	eenv->nrg.before = energy_before;
+	eenv->nrg.after = energy_after;
+	eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before;
+	eenv->payoff = 0;
+
+	return energy_diff_evaluate(eenv);
+}
+
+/*
  * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
  * A waker of many should wake a different task than the one last awakened
  * at a frequency roughly N times higher than one of its wakees.  In order
@@ -4779,6 +5190,157 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	return 1;
 }
 
+static inline unsigned long task_util(struct task_struct *p)
+{
+	return p->se.avg.util_avg;
+}
+
+unsigned int capacity_margin = 1280; /* ~20% margin */
+
+static inline unsigned long boosted_task_util(struct task_struct *task);
+
+static inline bool __task_fits(struct task_struct *p, int cpu, int util)
+{
+	unsigned long capacity = capacity_of(cpu);
+
+	util += boosted_task_util(p);
+
+	return (capacity * 1024) > (util * capacity_margin);
+}
+
+static inline bool task_fits_max(struct task_struct *p, int cpu)
+{
+	unsigned long capacity = capacity_of(cpu);
+	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+
+	if (capacity == max_capacity)
+		return true;
+
+	if (capacity * capacity_margin > max_capacity * 1024)
+		return true;
+
+	return __task_fits(p, cpu, 0);
+}
+
+static inline bool task_fits_spare(struct task_struct *p, int cpu)
+{
+	return __task_fits(p, cpu, cpu_util(cpu));
+}
+
+static bool cpu_overutilized(int cpu)
+{
+	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+}
+
+#ifdef CONFIG_SCHED_TUNE
+
+static unsigned long
+schedtune_margin(unsigned long signal, unsigned long boost)
+{
+	unsigned long long margin = 0;
+
+	/*
+	 * Signal proportional compensation (SPC)
+	 *
+	 * The Boost (B) value is used to compute a Margin (M) which is
+	 * proportional to the complement of the original Signal (S):
+	 *   M = B * (SCHED_LOAD_SCALE - S)
+	 * The obtained M could be used by the caller to "boost" S.
+	 */
+	margin  = SCHED_LOAD_SCALE - signal;
+	margin *= boost;
+
+	/*
+	 * Fast integer division by constant:
+	 *  Constant   :                 (C) = 100
+	 *  Precision  : 0.1%            (P) = 0.1
+	 *  Reference  : C * 100 / P     (R) = 100000
+	 *
+	 * Thus:
+	 *  Shift bits : ceil(log(R,2))  (S) = 17
+	 *  Mult const : round(2^S/C)    (M) = 1311
+	 *
+	 *
+	 */
+	margin  *= 1311;
+	margin >>= 17;
+
+	return margin;
+}
+
+static inline unsigned int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	unsigned int boost;
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boost = schedtune_cpu_boost(cpu);
+#else
+	boost = get_sysctl_sched_cfs_boost();
+#endif
+	if (boost == 0)
+		return 0;
+
+	return schedtune_margin(util, boost);
+}
+
+static inline unsigned long
+schedtune_task_margin(struct task_struct *task)
+{
+	unsigned int boost;
+	unsigned long util;
+	unsigned long margin;
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boost = schedtune_task_boost(task);
+#else
+	boost = get_sysctl_sched_cfs_boost();
+#endif
+	if (boost == 0)
+		return 0;
+
+	util = task_util(task);
+	margin = schedtune_margin(util, boost);
+
+	return margin;
+}
+
+#else /* CONFIG_SCHED_TUNE */
+
+static inline unsigned int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	return 0;
+}
+
+static inline unsigned int
+schedtune_task_margin(struct task_struct *task)
+{
+	return 0;
+}
+
+#endif /* CONFIG_SCHED_TUNE */
+
+static inline unsigned long
+boosted_cpu_util(int cpu)
+{
+	unsigned long util = cpu_util(cpu);
+	unsigned long margin = schedtune_cpu_margin(util, cpu);
+
+	trace_sched_boost_cpu(cpu, util, margin);
+
+	return util + margin;
+}
+
+static inline unsigned long
+boosted_task_util(struct task_struct *task)
+{
+	unsigned long util = task_util(task);
+	unsigned long margin = schedtune_task_margin(task);
+
+	return util + margin;
+}
+
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
@@ -4788,7 +5350,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		  int this_cpu, int sd_flag)
 {
 	struct sched_group *idlest = NULL, *group = sd->groups;
+	struct sched_group *fit_group = NULL, *spare_group = NULL;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
+	unsigned long fit_capacity = ULONG_MAX;
+	unsigned long max_spare_capacity = capacity_margin - SCHED_LOAD_SCALE;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
 
@@ -4796,7 +5361,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		load_idx = sd->wake_idx;
 
 	do {
-		unsigned long load, avg_load;
+		unsigned long load, avg_load, spare_capacity;
 		int local_group;
 		int i;
 
@@ -4819,6 +5384,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 				load = target_load(i, load_idx);
 
 			avg_load += load;
+
+			/*
+			 * Look for most energy-efficient group that can fit
+			 * that can fit the task.
+			 */
+			if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) {
+				fit_capacity = capacity_of(i);
+				fit_group = group;
+			}
+
+			/*
+			 * Look for group which has most spare capacity on a
+			 * single cpu.
+			 */
+			spare_capacity = capacity_of(i) - cpu_util(i);
+			if (spare_capacity > max_spare_capacity) {
+				max_spare_capacity = spare_capacity;
+				spare_group = group;
+			}
 		}
 
 		/* Adjust by relative CPU capacity of the group */
@@ -4832,6 +5416,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 	} while (group = group->next, group != sd->groups);
 
+	if (fit_group)
+		return fit_group;
+
+	if (spare_group)
+		return spare_group;
+
 	if (!idlest || 100*this_load < imbalance*min_load)
 		return NULL;
 	return idlest;
@@ -4852,7 +5442,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
-		if (idle_cpu(i)) {
+		if (task_fits_spare(p, i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
@@ -4864,7 +5454,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 				min_exit_latency = idle->exit_latency;
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
-			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
+			} else if (idle_cpu(i) &&
+				   (!idle || idle->exit_latency == min_exit_latency) &&
 				   rq->idle_stamp > latest_idle_timestamp) {
 				/*
 				 * If equal or no active idle state, then
@@ -4873,6 +5464,13 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 				 */
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
+			} else if (shallowest_idle_cpu == -1) {
+				/*
+				 * If we haven't found an idle CPU yet
+				 * pick a non-idle one that can fit the task as
+				 * fallback.
+				 */
+				shallowest_idle_cpu = i;
 			}
 		} else if (shallowest_idle_cpu == -1) {
 			load = weighted_cpuload(i);
@@ -4931,38 +5529,85 @@ done:
 	return target;
 }
 
-/*
- * cpu_util returns the amount of capacity of a CPU that is used by CFS
- * tasks. The unit of the return value must be the one of capacity so we can
- * compare the utilization with the capacity of the CPU that is available for
- * CFS task (ie cpu_capacity).
- *
- * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
- * recent utilization of currently non-runnable tasks on a CPU. It represents
- * the amount of utilization of a CPU in the range [0..capacity_orig] where
- * capacity_orig is the cpu_capacity available at the highest frequency
- * (arch_scale_freq_capacity()).
- * The utilization of a CPU converges towards a sum equal to or less than the
- * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
- * the running time on this CPU scaled by capacity_curr.
- *
- * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
- * higher than capacity_orig because of unfortunate rounding in
- * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
- * the average stabilizes with the new running time. We need to check that the
- * utilization stays within the range of [0..capacity_orig] and cap it if
- * necessary. Without utilization capping, a group could be seen as overloaded
- * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
- * available capacity. We allow utilization to overshoot capacity_curr (but not
- * capacity_orig) as it useful for predicting the capacity required after task
- * migrations (scheduler-driven DVFS).
- */
-static int cpu_util(int cpu)
+static int energy_aware_wake_cpu(struct task_struct *p, int target)
 {
-	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
-	unsigned long capacity = capacity_orig_of(cpu);
+	struct sched_domain *sd;
+	struct sched_group *sg, *sg_target;
+	int target_max_cap = INT_MAX;
+	int target_cpu = task_cpu(p);
+	int i;
+
+	sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+
+	if (!sd)
+		return target;
+
+	sg = sd->groups;
+	sg_target = sg;
+
+	/*
+	 * Find group with sufficient capacity. We only get here if no cpu is
+	 * overutilized. We may end up overutilizing a cpu by adding the task,
+	 * but that should not be any worse than select_idle_sibling().
+	 * load_balance() should sort it out later as we get above the tipping
+	 * point.
+	 */
+	do {
+		/* Assuming all cpus are the same in group */
+		int max_cap_cpu = group_first_cpu(sg);
+
+		/*
+		 * Assume smaller max capacity means more energy-efficient.
+		 * Ideally we should query the energy model for the right
+		 * answer but it easily ends up in an exhaustive search.
+		 */
+		if (capacity_of(max_cap_cpu) < target_max_cap &&
+		    task_fits_max(p, max_cap_cpu)) {
+			sg_target = sg;
+			target_max_cap = capacity_of(max_cap_cpu);
+		}
+	} while (sg = sg->next, sg != sd->groups);
 
-	return (util >= capacity) ? capacity : util;
+	/* Find cpu with sufficient capacity */
+	for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+		/*
+		 * p's blocked utilization is still accounted for on prev_cpu
+		 * so prev_cpu will receive a negative bias due to the double
+		 * accounting. However, the blocked utilization may be zero.
+		 */
+		int new_util = cpu_util(i) + boosted_task_util(p);
+
+		if (new_util > capacity_orig_of(i))
+			continue;
+
+		if (new_util < capacity_curr_of(i)) {
+			target_cpu = i;
+			if (cpu_rq(i)->nr_running)
+				break;
+		}
+
+		/* cpu has capacity at higher OPP, keep it as fallback */
+		if (target_cpu == task_cpu(p))
+			target_cpu = i;
+	}
+
+	if (target_cpu != task_cpu(p)) {
+		struct energy_env eenv = {
+			.util_delta	= task_util(p),
+			.src_cpu	= task_cpu(p),
+			.dst_cpu	= target_cpu,
+			.task		= p,
+		};
+
+		/* Not enough spare capacity on previous cpu */
+		if (cpu_overutilized(task_cpu(p)))
+			return target_cpu;
+
+		if (energy_diff(&eenv) >= 0)
+			return task_cpu(p);
+	}
+
+	return target_cpu;
 }
 
 /*
@@ -4987,7 +5632,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	int sync = wake_flags & WF_SYNC;
 
 	if (sd_flag & SD_BALANCE_WAKE)
-		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+		want_affine = (!wake_wide(p) && task_fits_max(p, cpu) &&
+			      cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) ||
+			      energy_aware();
 
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
@@ -5017,7 +5664,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	}
 
 	if (!sd) {
-		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
+		if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
+			new_cpu = energy_aware_wake_cpu(p, prev_cpu);
+		else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, new_cpu);
 
 	} else while (sd) {
@@ -5333,6 +5982,8 @@ again:
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	rq->misfit_task = !task_fits_max(p, rq->cpu);
+
 	return p;
 simple:
 	cfs_rq = &rq->cfs;
@@ -5354,9 +6005,12 @@ simple:
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	rq->misfit_task = !task_fits_max(p, rq->cpu);
+
 	return p;
 
 idle:
+	rq->misfit_task = 0;
 	/*
 	 * This is OK, because current is on_cpu, which avoids it being picked
 	 * for load-balance and preemption/IRQs are still disabled avoiding
@@ -5569,6 +6223,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
 
 enum fbq_type { regular, remote, all };
 
+enum group_type {
+	group_other = 0,
+	group_misfit_task,
+	group_imbalanced,
+	group_overloaded,
+};
+
 #define LBF_ALL_PINNED	0x01
 #define LBF_NEED_BREAK	0x02
 #define LBF_DST_PINNED  0x04
@@ -5587,6 +6248,7 @@ struct lb_env {
 	int			new_dst_cpu;
 	enum cpu_idle_type	idle;
 	long			imbalance;
+	unsigned int		src_grp_nr_running;
 	/* The set of CPUs under consideration for load-balancing */
 	struct cpumask		*cpus;
 
@@ -5597,6 +6259,7 @@ struct lb_env {
 	unsigned int		loop_max;
 
 	enum fbq_type		fbq_type;
+	enum group_type		busiest_group_type;
 	struct list_head	tasks;
 };
 
@@ -5923,6 +6586,10 @@ static void attach_one_task(struct rq *rq, struct task_struct *p)
 {
 	raw_spin_lock(&rq->lock);
 	attach_task(rq, p);
+	/*
+	 * We want to potentially raise target_cpu's OPP.
+	 */
+	update_capacity_of(cpu_of(rq));
 	raw_spin_unlock(&rq->lock);
 }
 
@@ -5944,6 +6611,11 @@ static void attach_tasks(struct lb_env *env)
 		attach_task(env->dst_rq, p);
 	}
 
+	/*
+	 * We want to potentially raise env.dst_cpu's OPP.
+	 */
+	update_capacity_of(env->dst_cpu);
+
 	raw_spin_unlock(&env->dst_rq->lock);
 }
 
@@ -6039,12 +6711,6 @@ static unsigned long task_h_load(struct task_struct *p)
 
 /********** Helpers for find_busiest_group ************************/
 
-enum group_type {
-	group_other = 0,
-	group_imbalanced,
-	group_overloaded,
-};
-
 /*
  * sg_lb_stats - stats of a sched_group required for load_balancing
  */
@@ -6060,6 +6726,7 @@ struct sg_lb_stats {
 	unsigned int group_weight;
 	enum group_type group_type;
 	int group_no_capacity;
+	int group_misfit_task; /* A cpu has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -6151,19 +6818,57 @@ static unsigned long scale_rt_capacity(int cpu)
 
 	used = div_u64(avg, total);
 
+	/*
+	 * deadline bandwidth is defined at system level so we must
+	 * weight this bandwidth with the max capacity of the system.
+	 * As a reminder, avg_bw is 20bits width and
+	 * scale_cpu_capacity is 10 bits width
+	 */
+	used += div_u64(rq->dl.avg_bw, arch_scale_cpu_capacity(NULL, cpu));
+
 	if (likely(used < SCHED_CAPACITY_SCALE))
 		return SCHED_CAPACITY_SCALE - used;
 
 	return 1;
 }
 
+void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
+{
+	raw_spin_lock_init(&mcc->lock);
+	mcc->val = 0;
+	mcc->cpu = -1;
+}
+
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
 	struct sched_group *sdg = sd->groups;
+	struct max_cpu_capacity *mcc;
+	unsigned long max_capacity;
+	int max_cap_cpu;
+	unsigned long flags;
 
 	cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
+	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	raw_spin_lock_irqsave(&mcc->lock, flags);
+	max_capacity = mcc->val;
+	max_cap_cpu = mcc->cpu;
+
+	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+	    (max_capacity < capacity)) {
+		mcc->val = capacity;
+		mcc->cpu = cpu;
+#ifdef CONFIG_SCHED_DEBUG
+		raw_spin_unlock_irqrestore(&mcc->lock, flags);
+		pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+		goto skip_unlock;
+#endif
+	}
+	raw_spin_unlock_irqrestore(&mcc->lock, flags);
+
+skip_unlock: __attribute__ ((unused));
 	capacity *= scale_rt_capacity(cpu);
 	capacity >>= SCHED_CAPACITY_SHIFT;
 
@@ -6172,13 +6877,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 
 	cpu_rq(cpu)->cpu_capacity = capacity;
 	sdg->sgc->capacity = capacity;
+	sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long capacity;
+	unsigned long capacity, max_capacity;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
@@ -6191,6 +6897,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 	}
 
 	capacity = 0;
+	max_capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -6215,11 +6922,12 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 			 */
 			if (unlikely(!rq->sd)) {
 				capacity += capacity_of(cpu);
-				continue;
+			} else {
+				sgc = rq->sd->groups->sgc;
+				capacity += sgc->capacity;
 			}
 
-			sgc = rq->sd->groups->sgc;
-			capacity += sgc->capacity;
+			max_capacity = max(capacity, max_capacity);
 		}
 	} else  {
 		/*
@@ -6229,12 +6937,16 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
 		group = child->groups;
 		do {
-			capacity += group->sgc->capacity;
+			struct sched_group_capacity *sgc = group->sgc;
+
+			capacity += sgc->capacity;
+			max_capacity = max(sgc->max_capacity, max_capacity);
 			group = group->next;
 		} while (group != child->groups);
 	}
 
 	sdg->sgc->capacity = capacity;
+	sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
@@ -6329,6 +7041,18 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 	return false;
 }
 
+
+/*
+ * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-cpu capacity than sched_group ref.
+ */
+static inline bool
+group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+	return sg->sgc->max_capacity + capacity_margin - SCHED_LOAD_SCALE <
+							ref->sgc->max_capacity;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
 			  struct sg_lb_stats *sgs)
@@ -6339,6 +7063,9 @@ group_type group_classify(struct sched_group *group,
 	if (sg_imbalanced(group))
 		return group_imbalanced;
 
+	if (sgs->group_misfit_task)
+		return group_misfit_task;
+
 	return group_other;
 }
 
@@ -6350,11 +7077,12 @@ group_type group_classify(struct sched_group *group,
  * @local_group: Does group contain this_cpu.
  * @sgs: variable to hold the statistics for this group.
  * @overload: Indicate more than one runnable task for any CPU.
+ * @overutilized: Indicate overutilization for any CPU.
  */
 static inline void update_sg_lb_stats(struct lb_env *env,
 			struct sched_group *group, int load_idx,
 			int local_group, struct sg_lb_stats *sgs,
-			bool *overload)
+			bool *overload, bool *overutilized)
 {
 	unsigned long load;
 	int i;
@@ -6384,6 +7112,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		sgs->sum_weighted_load += weighted_cpuload(i);
 		if (idle_cpu(i))
 			sgs->idle_cpus++;
+
+		if (cpu_overutilized(i)) {
+			*overutilized = true;
+			if (!sgs->group_misfit_task && rq->misfit_task)
+				sgs->group_misfit_task = capacity_of(i);
+		}
 	}
 
 	/* Adjust by relative CPU capacity of the group */
@@ -6425,9 +7159,25 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 	if (sgs->group_type < busiest->group_type)
 		return false;
 
+	/*
+	 * Candidate sg doesn't face any serious load-balance problems
+	 * so don't pick it if the local sg is already filled up.
+	 */
+	if (sgs->group_type == group_other &&
+	    !group_has_capacity(env, &sds->local_stat))
+		return false;
+
 	if (sgs->avg_load <= busiest->avg_load)
 		return false;
 
+	/*
+	 * Candiate sg has no more than one task per cpu and has higher
+	 * per-cpu capacity. No reason to pull tasks to less capable cpus.
+	 */
+	if (sgs->sum_nr_running <= sgs->group_weight &&
+	    group_smaller_cpu_capacity(sds->local, sg))
+		return false;
+
 	/* This is the busiest node in its class. */
 	if (!(env->sd->flags & SD_ASYM_PACKING))
 		return true;
@@ -6489,7 +7239,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 	struct sched_group *sg = env->sd->groups;
 	struct sg_lb_stats tmp_sgs;
 	int load_idx, prefer_sibling = 0;
-	bool overload = false;
+	bool overload = false, overutilized = false;
 
 	if (child && child->flags & SD_PREFER_SIBLING)
 		prefer_sibling = 1;
@@ -6511,7 +7261,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
-						&overload);
+						&overload, &overutilized);
 
 		if (local_group)
 			goto next_group;
@@ -6533,6 +7283,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 			sgs->group_type = group_classify(sg, sgs);
 		}
 
+		/*
+		 * Ignore task groups with misfit tasks if local group has no
+		 * capacity or if per-cpu capacity isn't higher.
+		 */
+		if (sgs->group_type == group_misfit_task &&
+		    (!group_has_capacity(env, &sds->local_stat) ||
+		     !group_smaller_cpu_capacity(sg, sds->local)))
+			sgs->group_type = group_other;
+
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
 			sds->busiest = sg;
 			sds->busiest_stat = *sgs;
@@ -6549,12 +7308,20 @@ next_group:
 	if (env->sd->flags & SD_NUMA)
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
+	env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+
 	if (!env->sd->parent) {
 		/* update overload indicator if we are at root domain */
 		if (env->dst_rq->rd->overload != overload)
 			env->dst_rq->rd->overload = overload;
-	}
 
+		/* Update over-utilization (tipping point, U >= 0) indicator */
+		if (env->dst_rq->rd->overutilized != overutilized)
+			env->dst_rq->rd->overutilized = overutilized;
+	} else {
+		if (!env->dst_rq->rd->overutilized && overutilized)
+			env->dst_rq->rd->overutilized = true;
+	}
 }
 
 /**
@@ -6701,6 +7468,22 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 	 */
 	if (busiest->avg_load <= sds->avg_load ||
 	    local->avg_load >= sds->avg_load) {
+		/* Misfitting tasks should be migrated in any case */
+		if (busiest->group_type == group_misfit_task) {
+			env->imbalance = busiest->group_misfit_task;
+			return;
+		}
+
+		/*
+		 * Busiest group is overloaded, local is not, use the spare
+		 * cycles to maximize throughput
+		 */
+		if (busiest->group_type == group_overloaded &&
+		    local->group_type <= group_misfit_task) {
+			env->imbalance = busiest->load_per_task;
+			return;
+		}
+
 		env->imbalance = 0;
 		return fix_small_imbalance(env, sds);
 	}
@@ -6734,6 +7517,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_CAPACITY_SCALE;
 
+	/* Boost imbalance to allow misfit task to be balanced. */
+	if (busiest->group_type == group_misfit_task)
+		env->imbalance = max_t(long, env->imbalance,
+				     busiest->group_misfit_task);
+
 	/*
 	 * if *imbalance is less than the average load per runnable task
 	 * there is no guarantee that any tasks will be moved so we'll have
@@ -6775,6 +7563,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	 * this level.
 	 */
 	update_sd_lb_stats(env, &sds);
+
+	if (energy_aware() && !env->dst_rq->rd->overutilized)
+		goto out_balanced;
+
 	local = &sds.local_stat;
 	busiest = &sds.busiest_stat;
 
@@ -6803,6 +7595,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	    busiest->group_no_capacity)
 		goto force_balance;
 
+	/* Misfitting tasks should be dealt with regardless of the avg load */
+	if (busiest->group_type == group_misfit_task) {
+		goto force_balance;
+	}
+
 	/*
 	 * If the local group is busier than the selected busiest group
 	 * don't try and pull any tasks.
@@ -6826,7 +7623,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 		 * might end up to just move the imbalance on another group
 		 */
 		if ((busiest->group_type != group_overloaded) &&
-				(local->idle_cpus <= (busiest->idle_cpus + 1)))
+		    (local->idle_cpus <= (busiest->idle_cpus + 1)) &&
+		    !group_smaller_cpu_capacity(sds.busiest, sds.local))
 			goto out_balanced;
 	} else {
 		/*
@@ -6839,6 +7637,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	}
 
 force_balance:
+	env->busiest_group_type = busiest->group_type;
 	/* Looks like there is an imbalance. Compute it */
 	calculate_imbalance(env, &sds);
 	return sds.busiest;
@@ -6897,7 +7696,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 		 */
 
 		if (rq->nr_running == 1 && wl > env->imbalance &&
-		    !check_cpu_capacity(rq, env->sd))
+		    !check_cpu_capacity(rq, env->sd) &&
+		    env->busiest_group_type != group_misfit_task)
 			continue;
 
 		/*
@@ -6958,6 +7758,13 @@ static int need_active_balance(struct lb_env *env)
 			return 1;
 	}
 
+	if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+				env->src_rq->cfs.h_nr_running == 1 &&
+				cpu_overutilized(env->src_cpu) &&
+				!cpu_overutilized(env->dst_cpu)) {
+			return 1;
+	}
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -7079,6 +7886,11 @@ more_balance:
 		 * ld_moved     - cumulative load moved across iterations
 		 */
 		cur_ld_moved = detach_tasks(&env);
+		/*
+		 * We want to potentially lower env.src_cpu's OPP.
+		 */
+		if (cur_ld_moved)
+			update_capacity_of(env.src_cpu);
 
 		/*
 		 * We've detached some tasks from busiest_rq. Every
@@ -7170,7 +7982,8 @@ more_balance:
 		 * excessive cache_hot migrations and active balances.
 		 */
 		if (idle != CPU_NEWLY_IDLE)
-			sd->nr_balance_failed++;
+			if (env.src_grp_nr_running > 1)
+				sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
 			raw_spin_lock_irqsave(&busiest->lock, flags);
@@ -7311,8 +8124,9 @@ static int idle_balance(struct rq *this_rq)
 	 */
 	this_rq->idle_stamp = rq_clock(this_rq);
 
-	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
-	    !this_rq->rd->overload) {
+	if (!energy_aware() &&
+	    (this_rq->avg_idle < sysctl_sched_migration_cost ||
+	     !this_rq->rd->overload)) {
 		rcu_read_lock();
 		sd = rcu_dereference_check_sched_domain(this_rq->sd);
 		if (sd)
@@ -7447,8 +8261,13 @@ static int active_load_balance_cpu_stop(void *data)
 		schedstat_inc(sd, alb_count);
 
 		p = detach_one_task(&env);
-		if (p)
+		if (p) {
 			schedstat_inc(sd, alb_pushed);
+			/*
+			 * We want to potentially lower env.src_cpu's OPP.
+			 */
+			update_capacity_of(env.src_cpu);
+		}
 		else
 			schedstat_inc(sd, alb_failed);
 	}
@@ -7828,12 +8647,13 @@ static inline bool nohz_kick_needed(struct rq *rq)
 	if (time_before(now, nohz.next_balance))
 		return false;
 
-	if (rq->nr_running >= 2)
+	if (rq->nr_running >= 2 &&
+	    (!energy_aware() || cpu_overutilized(cpu)))
 		return true;
 
 	rcu_read_lock();
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
-	if (sd) {
+	if (sd && !energy_aware()) {
 		sgc = sd->groups->sgc;
 		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
@@ -7939,6 +8759,11 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
+
+	if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+		rq->rd->overutilized = true;
+
+	rq->misfit_task = !task_fits_max(curr, rq->cpu);
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 69631fa46c2f..b634151ce286 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -69,3 +69,8 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)
 
+/*
+ * Energy aware scheduling. Use platform energy model to guide scheduling
+ * decisions optimizing for energy efficiency.
+ */
+SCHED_FEAT(ENERGY_AWARE, false)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 4a2ef5a02fd3..cbc130efbc5b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -19,9 +19,10 @@
  * sched_idle_set_state - Record idle state for the current CPU.
  * @idle_state: State to record.
  */
-void sched_idle_set_state(struct cpuidle_state *idle_state)
+void sched_idle_set_state(struct cpuidle_state *idle_state, int index)
 {
 	idle_set_state(this_rq(), idle_state);
+	idle_set_state_idx(this_rq(), index);
 }
 
 static int __read_mostly cpu_idle_force_poll;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 78ae5c1d9412..ef4e734c6765 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1426,6 +1426,41 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
 #endif
 }
 
+#ifdef CONFIG_SMP
+static void sched_rt_update_capacity_req(struct rq *rq)
+{
+	u64 total, used, age_stamp, avg;
+	s64 delta;
+
+	if (!sched_freq())
+		return;
+
+	sched_avg_update(rq);
+	/*
+	 * Since we're reading these variables without serialization make sure
+	 * we read them once before doing sanity checks on them.
+	 */
+	age_stamp = READ_ONCE(rq->age_stamp);
+	avg = READ_ONCE(rq->rt_avg);
+	delta = rq_clock(rq) - age_stamp;
+
+	if (unlikely(delta < 0))
+		delta = 0;
+
+	total = sched_avg_period() + delta;
+
+	used = div_u64(avg, total);
+	if (unlikely(used > SCHED_CAPACITY_SCALE))
+		used = SCHED_CAPACITY_SCALE;
+
+	set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used));
+}
+#else
+static inline void sched_rt_update_capacity_req(struct rq *rq)
+{ }
+
+#endif
+
 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
 						   struct rt_rq *rt_rq)
 {
@@ -1494,8 +1529,17 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 	if (prev->sched_class == &rt_sched_class)
 		update_curr_rt(rq);
 
-	if (!rt_rq->rt_queued)
+	if (!rt_rq->rt_queued) {
+		/*
+		 * The next task to be picked on this rq will have a lower
+		 * priority than rt tasks so we can spend some time to update
+		 * the capacity used by rt tasks based on the last activity.
+		 * This value will be the used as an estimation of the next
+		 * activity.
+		 */
+		sched_rt_update_capacity_req(rq);
 		return NULL;
+	}
 
 	put_prev_task(rq, prev);
 
@@ -2211,6 +2255,9 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 
 	update_curr_rt(rq);
 
+	if (rq->rt.rt_nr_running)
+		sched_rt_update_capacity_req(rq);
+
 	watchdog(rq, p);
 
 	/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4e5db65d1aab..709570f64054 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -506,10 +506,18 @@ struct dl_rq {
 #else
 	struct dl_bw dl_bw;
 #endif
+	/* This is the "average utilization" for this runqueue */
+	s64 avg_bw;
 };
 
 #ifdef CONFIG_SMP
 
+struct max_cpu_capacity {
+	raw_spinlock_t lock;
+	unsigned long val;
+	int cpu;
+};
+
 /*
  * We add the notion of a root-domain which will be used to define per-domain
  * variables. Each exclusive cpuset essentially defines an island domain by
@@ -528,6 +536,9 @@ struct root_domain {
 	/* Indicate more than one runnable task for any CPU */
 	bool overload;
 
+	/* Indicate one or more cpus over-utilized (tipping point) */
+	bool overutilized;
+
 	/*
 	 * The bit corresponding to a CPU gets set here if such CPU has more
 	 * than one runnable -deadline task (as it is below for RT tasks).
@@ -543,6 +554,9 @@ struct root_domain {
 	 */
 	cpumask_var_t rto_mask;
 	struct cpupri cpupri;
+
+	/* Maximum cpu capacity in the system. */
+	struct max_cpu_capacity max_cpu_capacity;
 };
 
 extern struct root_domain def_root_domain;
@@ -572,6 +586,7 @@ struct rq {
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
 	unsigned long last_load_update_tick;
+	unsigned int misfit_task;
 #ifdef CONFIG_NO_HZ_COMMON
 	u64 nohz_stamp;
 	unsigned long nohz_flags;
@@ -687,6 +702,7 @@ struct rq {
 #ifdef CONFIG_CPU_IDLE
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state *idle_state;
+	int idle_state_idx;
 #endif
 };
 
@@ -836,6 +852,8 @@ DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+DECLARE_PER_CPU(struct sched_domain *, sd_ea);
+DECLARE_PER_CPU(struct sched_domain *, sd_scs);
 
 struct sched_group_capacity {
 	atomic_t ref;
@@ -843,7 +861,8 @@ struct sched_group_capacity {
 	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
 	 * for a single CPU.
 	 */
-	unsigned int capacity;
+	unsigned long capacity;
+	unsigned long max_capacity; /* Max per-cpu capacity in group */
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
@@ -860,6 +879,7 @@ struct sched_group {
 
 	unsigned int group_weight;
 	struct sched_group_capacity *sgc;
+	const struct sched_group_energy const *sge;
 
 	/*
 	 * The CPUs this group covers.
@@ -1163,6 +1183,7 @@ static const u32 prio_to_wmult[40] = {
 #endif
 #define ENQUEUE_REPLENISH	0x08
 #define ENQUEUE_RESTORE	0x10
+#define ENQUEUE_WAKEUP_NEW	0x20
 
 #define DEQUEUE_SLEEP		0x01
 #define DEQUEUE_SAVE		0x02
@@ -1276,6 +1297,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 	WARN_ON(!rcu_read_lock_held());
 	return rq->idle_state;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+	rq->idle_state_idx = idle_state_idx;
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	WARN_ON(!rcu_read_lock_held());
+	return rq->idle_state_idx;
+}
 #else
 static inline void idle_set_state(struct rq *rq,
 				  struct cpuidle_state *idle_state)
@@ -1286,6 +1318,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 {
 	return NULL;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	return -1;
+}
 #endif
 
 extern void sysrq_sched_debug_show(void);
@@ -1310,6 +1351,8 @@ unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
 
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
+
 static inline void add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
@@ -1415,10 +1458,117 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 }
 #endif
 
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity;
+}
+
+static inline unsigned long capacity_orig_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
+/*
+ * cpu_util returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the utilization with the capacity of the CPU that is available for
+ * CFS task (ie cpu_capacity).
+ *
+ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
+ * recent utilization of currently non-runnable tasks on a CPU. It represents
+ * the amount of utilization of a CPU in the range [0..capacity_orig] where
+ * capacity_orig is the cpu_capacity available at the highest frequency
+ * (arch_scale_freq_capacity()).
+ * The utilization of a CPU converges towards a sum equal to or less than the
+ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
+ * the running time on this CPU scaled by capacity_curr.
+ *
+ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
+ * higher than capacity_orig because of unfortunate rounding in
+ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
+ * the average stabilizes with the new running time. We need to check that the
+ * utilization stays within the range of [0..capacity_orig] and cap it if
+ * necessary. Without utilization capping, a group could be seen as overloaded
+ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
+ * available capacity. We allow utilization to overshoot capacity_curr (but not
+ * capacity_orig) as it useful for predicting the capacity required after task
+ * migrations (scheduler-driven DVFS).
+ */
+static inline unsigned long __cpu_util(int cpu, int delta)
+{
+	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+	unsigned long capacity = capacity_orig_of(cpu);
+
+	delta += util;
+	if (delta < 0)
+		return 0;
+
+	return (delta >= capacity) ? capacity : delta;
+}
+
+static inline unsigned long cpu_util(int cpu)
+{
+	return __cpu_util(cpu, 0);
+}
+
+#endif
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+#define capacity_max SCHED_CAPACITY_SCALE
+extern unsigned int capacity_margin;
+extern struct static_key __sched_freq;
+
+static inline bool sched_freq(void)
+{
+	return static_key_false(&__sched_freq);
+}
+
+DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+void update_cpu_capacity_request(int cpu, bool request);
+
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+					unsigned long capacity)
+{
+	if (per_cpu(cpu_sched_capacity_reqs, cpu).cfs != capacity) {
+		per_cpu(cpu_sched_capacity_reqs, cpu).cfs = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{
+	if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
+		per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{
+	if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
+		per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+#else
+static inline bool sched_freq(void) { return false; }
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+					unsigned long capacity)
+{ }
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{ }
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{ }
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
 	rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
-	sched_avg_update(rq);
 }
 #else
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
new file mode 100644
index 000000000000..7a434f2394e7
--- /dev/null
+++ b/kernel/sched/tune.c
@@ -0,0 +1,747 @@
+#include <linux/cgroup.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/reciprocal_div.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include <trace/events/sched.h>
+
+#include "sched.h"
+
+unsigned int sysctl_sched_cfs_boost __read_mostly;
+
+/*
+ * System energy normalization constants
+ */
+static struct target_nrg {
+	unsigned long min_power;
+	unsigned long max_power;
+	struct reciprocal_value rdiv;
+} schedtune_target_nrg;
+
+/* Performance Boost region (B) threshold params */
+static int perf_boost_idx;
+
+/* Performance Constraint region (C) threshold params */
+static int perf_constrain_idx;
+
+/**
+ * Performance-Energy (P-E) Space thresholds constants
+ */
+struct threshold_params {
+	int nrg_gain;
+	int cap_gain;
+};
+
+/*
+ * System specific P-E space thresholds constants
+ */
+static struct threshold_params
+threshold_gains[] = {
+	{ 0, 4 }, /* >=  0% */
+	{ 0, 4 }, /* >= 10% */
+	{ 1, 4 }, /* >= 20% */
+	{ 2, 4 }, /* >= 30% */
+	{ 3, 4 }, /* >= 40% */
+	{ 4, 3 }, /* >= 50% */
+	{ 4, 2 }, /* >= 60% */
+	{ 4, 1 }, /* >= 70% */
+	{ 4, 0 }, /* >= 80% */
+	{ 4, 0 }  /* >= 90% */
+};
+
+static int
+__schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			  int perf_boost_idx, int perf_constrain_idx)
+{
+	int payoff = -INT_MAX;
+
+	/* Performance Boost (B) region */
+	if (nrg_delta > 0 && cap_delta > 0) {
+		/*
+		 * Evaluate "Performance Boost" vs "Energy Increase"
+		 * payoff criteria:
+		 *    cap_delta / nrg_delta < cap_gain / nrg_gain
+		 * which is:
+		 *    nrg_delta * cap_gain > cap_delta * nrg_gain
+		 */
+		payoff  = nrg_delta * threshold_gains[perf_boost_idx].cap_gain;
+		payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain;
+		return payoff;
+	}
+
+	/* Performance Constraint (C) region */
+	if (nrg_delta < 0 && cap_delta < 0) {
+		/*
+		 * Evaluate "Performance Boost" vs "Energy Increase"
+		 * payoff criteria:
+		 *    cap_delta / nrg_delta > cap_gain / nrg_gain
+		 * which is:
+		 *    cap_delta * nrg_gain > nrg_delta * cap_gain
+		 */
+		payoff  = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain;
+		payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain;
+		return payoff;
+	}
+
+	/* Default: reject schedule candidate */
+	return payoff;
+}
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+
+/*
+ * EAS scheduler tunables for task groups.
+ */
+
+/* SchdTune tunables for a group of tasks */
+struct schedtune {
+	/* SchedTune CGroup subsystem */
+	struct cgroup_subsys_state css;
+
+	/* Boost group allocated ID */
+	int idx;
+
+	/* Boost value for tasks on that SchedTune CGroup */
+	int boost;
+
+	/* Performance Boost (B) region threshold params */
+	int perf_boost_idx;
+
+	/* Performance Constraint (C) region threshold params */
+	int perf_constrain_idx;
+};
+
+static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct schedtune, css) : NULL;
+}
+
+static inline struct schedtune *task_schedtune(struct task_struct *tsk)
+{
+	return css_st(task_css(tsk, schedtune_cgrp_id));
+}
+
+static inline struct schedtune *parent_st(struct schedtune *st)
+{
+	return css_st(st->css.parent);
+}
+
+/*
+ * SchedTune root control group
+ * The root control group is used to defined a system-wide boosting tuning,
+ * which is applied to all tasks in the system.
+ * Task specific boost tuning could be specified by creating and
+ * configuring a child control group under the root one.
+ * By default, system-wide boosting is disabled, i.e. no boosting is applied
+ * to tasks which are not into a child control group.
+ */
+static struct schedtune
+root_schedtune = {
+	.boost	= 0,
+	.perf_boost_idx = 0,
+	.perf_constrain_idx = 0,
+};
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			struct task_struct *task)
+{
+	struct schedtune *ct;
+	int perf_boost_idx;
+	int perf_constrain_idx;
+
+	/* Optimal (O) region */
+	if (nrg_delta < 0 && cap_delta > 0)
+		return INT_MAX;
+
+	/* Suboptimal (S) region */
+	if (nrg_delta > 0 && cap_delta < 0)
+		return -INT_MAX;
+
+	/* Get task specific perf Boost/Constraints indexes */
+	rcu_read_lock();
+	ct = task_schedtune(task);
+	perf_boost_idx = ct->perf_boost_idx;
+	perf_constrain_idx = ct->perf_constrain_idx;
+	rcu_read_unlock();
+
+	return __schedtune_accept_deltas(nrg_delta, cap_delta,
+			perf_boost_idx, perf_constrain_idx);
+}
+
+/*
+ * Maximum number of boost groups to support
+ * When per-task boosting is used we still allow only limited number of
+ * boost groups for two main reasons:
+ * 1. on a real system we usually have only few classes of workloads which
+ *    make sense to boost with different values (e.g. background vs foreground
+ *    tasks, interactive vs low-priority tasks)
+ * 2. a limited number allows for a simpler and more memory/time efficient
+ *    implementation especially for the computation of the per-CPU boost
+ *    value
+ */
+#define BOOSTGROUPS_COUNT 4
+
+/* Array of configured boostgroups */
+static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
+	&root_schedtune,
+	NULL,
+};
+
+/* SchedTune boost groups
+ * Keep track of all the boost groups which impact on CPU, for example when a
+ * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
+ * likely with different boost values.
+ * Since on each system we expect only a limited number of boost groups, here
+ * we use a simple array to keep track of the metrics required to compute the
+ * maximum per-CPU boosting value.
+ */
+struct boost_groups {
+	/* Maximum boost value for all RUNNABLE tasks on a CPU */
+	unsigned boost_max;
+	struct {
+		/* The boost for tasks on that boost group */
+		unsigned boost;
+		/* Count of RUNNABLE tasks on that boost group */
+		unsigned tasks;
+	} group[BOOSTGROUPS_COUNT];
+};
+
+/* Boost groups affecting each CPU in the system */
+DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
+
+static void
+schedtune_cpu_update(int cpu)
+{
+	struct boost_groups *bg;
+	unsigned boost_max;
+	int idx;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+
+	/* The root boost group is always active */
+	boost_max = bg->group[0].boost;
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
+		/*
+		 * A boost group affects a CPU only if it has
+		 * RUNNABLE tasks on that CPU
+		 */
+		if (bg->group[idx].tasks == 0)
+			continue;
+		boost_max = max(boost_max, bg->group[idx].boost);
+	}
+
+	bg->boost_max = boost_max;
+}
+
+static int
+schedtune_boostgroup_update(int idx, int boost)
+{
+	struct boost_groups *bg;
+	int cur_boost_max;
+	int old_boost;
+	int cpu;
+
+	/* Update per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+
+		/*
+		 * Keep track of current boost values to compute the per CPU
+		 * maximum only when it has been affected by the new value of
+		 * the updated boost group
+		 */
+		cur_boost_max = bg->boost_max;
+		old_boost = bg->group[idx].boost;
+
+		/* Update the boost value of this boost group */
+		bg->group[idx].boost = boost;
+
+		/* Check if this update increase current max */
+		if (boost > cur_boost_max && bg->group[idx].tasks) {
+			bg->boost_max = boost;
+			trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
+			continue;
+		}
+
+		/* Check if this update has decreased current max */
+		if (cur_boost_max == old_boost && old_boost > boost) {
+			schedtune_cpu_update(cpu);
+			trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
+			continue;
+		}
+
+		trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
+	}
+
+	return 0;
+}
+
+static inline void
+schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
+{
+	struct boost_groups *bg;
+	int tasks;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+
+	/* Update boosted tasks count while avoiding to make it negative */
+	if (task_count < 0 && bg->group[idx].tasks <= -task_count)
+		bg->group[idx].tasks = 0;
+	else
+		bg->group[idx].tasks += task_count;
+
+	/* Boost group activation or deactivation on that RQ */
+	tasks = bg->group[idx].tasks;
+	if (tasks == 1 || tasks == 0)
+		schedtune_cpu_update(cpu);
+
+	trace_sched_tune_tasks_update(p, cpu, tasks, idx,
+			bg->group[idx].boost, bg->boost_max);
+
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_enqueue_task(struct task_struct *p, int cpu)
+{
+	struct schedtune *st;
+	int idx;
+
+	/*
+	 * When a task is marked PF_EXITING by do_exit() it's going to be
+	 * dequeued and enqueued multiple times in the exit path.
+	 * Thus we avoid any further update, since we do not want to change
+	 * CPU boosting while the task is exiting.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/* Get task boost group */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	idx = st->idx;
+	rcu_read_unlock();
+
+	schedtune_tasks_update(p, cpu, idx, 1);
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_dequeue_task(struct task_struct *p, int cpu)
+{
+	struct schedtune *st;
+	int idx;
+
+	/*
+	 * When a task is marked PF_EXITING by do_exit() it's going to be
+	 * dequeued and enqueued multiple times in the exit path.
+	 * Thus we avoid any further update, since we do not want to change
+	 * CPU boosting while the task is exiting.
+	 * The last dequeue will be done by cgroup exit() callback.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/* Get task boost group */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	idx = st->idx;
+	rcu_read_unlock();
+
+	schedtune_tasks_update(p, cpu, idx, -1);
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+	struct boost_groups *bg;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+	return bg->boost_max;
+}
+
+int schedtune_task_boost(struct task_struct *p)
+{
+	struct schedtune *st;
+	int task_boost;
+
+	/* Get task boost value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	task_boost = st->boost;
+	rcu_read_unlock();
+
+	return task_boost;
+}
+
+static u64
+boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->boost;
+}
+
+static int
+boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 boost)
+{
+	struct schedtune *st = css_st(css);
+
+	if (boost < 0 || boost > 100)
+		return -EINVAL;
+
+	st->boost = boost;
+	if (css == &root_schedtune.css)
+		sysctl_sched_cfs_boost = boost;
+
+	/* Update CPU boost */
+	schedtune_boostgroup_update(st->idx, st->boost);
+
+	trace_sched_tune_config(st->boost);
+
+	return 0;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "boost",
+		.read_u64 = boost_read,
+		.write_u64 = boost_write,
+	},
+	{ }	/* terminate */
+};
+
+static int
+schedtune_boostgroup_init(struct schedtune *st)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Keep track of allocated boost groups */
+	allocated_group[st->idx] = st;
+
+	/* Initialize the per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		bg->group[st->idx].boost = 0;
+		bg->group[st->idx].tasks = 0;
+	}
+
+	return 0;
+}
+
+static int
+schedtune_init(void)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Initialize the per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		memset(bg, 0, sizeof(struct boost_groups));
+	}
+
+	pr_info("  schedtune configured to support %d boost groups\n",
+		BOOSTGROUPS_COUNT);
+	return 0;
+}
+
+static struct cgroup_subsys_state *
+schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct schedtune *st;
+	int idx;
+
+	if (!parent_css) {
+		schedtune_init();
+		return &root_schedtune.css;
+	}
+
+	/* Allow only single level hierachies */
+	if (parent_css != &root_schedtune.css) {
+		pr_err("Nested SchedTune boosting groups not allowed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Allow only a limited number of boosting groups */
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
+		if (!allocated_group[idx])
+			break;
+	if (idx == BOOSTGROUPS_COUNT) {
+		pr_err("Trying to create more than %d SchedTune boosting groups\n",
+		       BOOSTGROUPS_COUNT);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto out;
+
+	/* Initialize per CPUs boost group support */
+	st->idx = idx;
+	if (schedtune_boostgroup_init(st))
+		goto release;
+
+	return &st->css;
+
+release:
+	kfree(st);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void
+schedtune_boostgroup_release(struct schedtune *st)
+{
+	/* Reset this boost group */
+	schedtune_boostgroup_update(st->idx, 0);
+
+	/* Keep track of allocated boost groups */
+	allocated_group[st->idx] = NULL;
+}
+
+static void
+schedtune_css_free(struct cgroup_subsys_state *css)
+{
+	struct schedtune *st = css_st(css);
+
+	schedtune_boostgroup_release(st);
+	kfree(st);
+}
+
+struct cgroup_subsys schedtune_cgrp_subsys = {
+	.css_alloc	= schedtune_css_alloc,
+	.css_free	= schedtune_css_free,
+	.legacy_cftypes	= files,
+	.early_init	= 1,
+};
+
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			struct task_struct *task)
+{
+	/* Optimal (O) region */
+	if (nrg_delta < 0 && cap_delta > 0)
+		return INT_MAX;
+
+	/* Suboptimal (S) region */
+	if (nrg_delta > 0 && cap_delta < 0)
+		return -INT_MAX;
+
+	return __schedtune_accept_deltas(nrg_delta, cap_delta,
+			perf_boost_idx, perf_constrain_idx);
+}
+
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
+			       void __user *buffer, size_t *lenp,
+			       loff_t *ppos)
+{
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write)
+		return ret;
+
+	/* Performance Boost (B) region threshold params */
+	perf_boost_idx  = sysctl_sched_cfs_boost;
+	perf_boost_idx /= 10;
+
+	/* Performance Constraint (C) region threshold params */
+	perf_constrain_idx  = 100 - sysctl_sched_cfs_boost;
+	perf_constrain_idx /= 10;
+
+	return 0;
+}
+
+/*
+ * System energy normalization
+ * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE],
+ * corresponding to the specified energy variation.
+ */
+int
+schedtune_normalize_energy(int energy_diff)
+{
+	u32 normalized_nrg;
+	int max_delta;
+
+#ifdef CONFIG_SCHED_DEBUG
+	/* Check for boundaries */
+	max_delta  = schedtune_target_nrg.max_power;
+	max_delta -= schedtune_target_nrg.min_power;
+	WARN_ON(abs(energy_diff) >= max_delta);
+#endif
+
+	/* Do scaling using positive numbers to increase the range */
+	normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
+
+	/* Scale by energy magnitude */
+	normalized_nrg <<= SCHED_LOAD_SHIFT;
+
+	/* Normalize on max energy for target platform */
+	normalized_nrg = reciprocal_divide(
+			normalized_nrg, schedtune_target_nrg.rdiv);
+
+	return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static void
+schedtune_test_nrg(unsigned long delta_pwr)
+{
+	unsigned long test_delta_pwr;
+	unsigned long test_norm_pwr;
+	int idx;
+
+	/*
+	 * Check normalization constants using some constant system
+	 * energy values
+	 */
+	pr_info("schedtune: verify normalization constants...\n");
+	for (idx = 0; idx < 6; ++idx) {
+		test_delta_pwr = delta_pwr >> idx;
+
+		/* Normalize on max energy for target platform */
+		test_norm_pwr = reciprocal_divide(
+					test_delta_pwr << SCHED_LOAD_SHIFT,
+					schedtune_target_nrg.rdiv);
+
+		pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
+			idx, test_delta_pwr, test_norm_pwr);
+	}
+}
+#else
+#define schedtune_test_nrg(delta_pwr)
+#endif
+
+/*
+ * Compute the min/max power consumption of a cluster and all its CPUs
+ */
+static void
+schedtune_add_cluster_nrg(
+		struct sched_domain *sd,
+		struct sched_group *sg,
+		struct target_nrg *ste)
+{
+	struct sched_domain *sd2;
+	struct sched_group *sg2;
+
+	struct cpumask *cluster_cpus;
+	char str[32];
+
+	unsigned long min_pwr;
+	unsigned long max_pwr;
+	int cpu;
+
+	/* Get Cluster energy using EM data for the first CPU */
+	cluster_cpus = sched_group_cpus(sg);
+	snprintf(str, 32, "CLUSTER[%*pbl]",
+		 cpumask_pr_args(cluster_cpus));
+
+	min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
+	max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
+	pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+		str, min_pwr, max_pwr);
+
+	/*
+	 * Keep track of this cluster's energy in the computation of the
+	 * overall system energy
+	 */
+	ste->min_power += min_pwr;
+	ste->max_power += max_pwr;
+
+	/* Get CPU energy using EM data for each CPU in the group */
+	for_each_cpu(cpu, cluster_cpus) {
+		/* Get a SD view for the specific CPU */
+		for_each_domain(cpu, sd2) {
+			/* Get the CPU group */
+			sg2 = sd2->groups;
+			min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
+			max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
+
+			ste->min_power += min_pwr;
+			ste->max_power += max_pwr;
+
+			snprintf(str, 32, "CPU[%d]", cpu);
+			pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+				str, min_pwr, max_pwr);
+
+			/*
+			 * Assume we have EM data only at the CPU and
+			 * the upper CLUSTER level
+			 */
+			BUG_ON(!cpumask_equal(
+				sched_group_cpus(sg),
+				sched_group_cpus(sd2->parent->groups)
+				));
+			break;
+		}
+	}
+}
+
+/*
+ * Initialize the constants required to compute normalized energy.
+ * The values of these constants depends on the EM data for the specific
+ * target system and topology.
+ * Thus, this function is expected to be called by the code
+ * that bind the EM to the topology information.
+ */
+static int
+schedtune_init_late(void)
+{
+	struct target_nrg *ste = &schedtune_target_nrg;
+	unsigned long delta_pwr = 0;
+	struct sched_domain *sd;
+	struct sched_group *sg;
+
+	pr_info("schedtune: init normalization constants...\n");
+	ste->max_power = 0;
+	ste->min_power = 0;
+
+	rcu_read_lock();
+
+	/*
+	 * When EAS is in use, we always have a pointer to the highest SD
+	 * which provides EM data.
+	 */
+	sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
+	if (!sd) {
+		pr_info("schedtune: no energy model data\n");
+		goto nodata;
+	}
+
+	sg = sd->groups;
+	do {
+		schedtune_add_cluster_nrg(sd, sg, ste);
+	} while (sg = sg->next, sg != sd->groups);
+
+	rcu_read_unlock();
+
+	pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+		"SYSTEM", ste->min_power, ste->max_power);
+
+	/* Compute normalization constants */
+	delta_pwr = ste->max_power - ste->min_power;
+	ste->rdiv = reciprocal_value(delta_pwr);
+	pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
+		ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
+
+	schedtune_test_nrg(delta_pwr);
+	return 0;
+
+nodata:
+	rcu_read_unlock();
+	return -EINVAL;
+}
+late_initcall(schedtune_init_late);
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
new file mode 100644
index 000000000000..f7273a5d994a
--- /dev/null
+++ b/kernel/sched/tune.h
@@ -0,0 +1,31 @@
+
+#ifdef CONFIG_SCHED_TUNE
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+
+int schedtune_cpu_boost(int cpu);
+int schedtune_task_boost(struct task_struct *tsk);
+
+void schedtune_enqueue_task(struct task_struct *p, int cpu);
+void schedtune_dequeue_task(struct task_struct *p, int cpu);
+
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+#define schedtune_enqueue_task(task, cpu) do { } while (0)
+#define schedtune_dequeue_task(task, cpu) do { } while (0)
+
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
+
+int schedtune_normalize_energy(int energy);
+int schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			    struct task_struct *task);
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_enqueue_task(task, cpu) do { } while (0)
+#define schedtune_dequeue_task(task, cpu) do { } while (0)
+
+#define schedtune_normalize_energy(energy) energy
+#define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta
+
+#endif /* CONFIG_SCHED_TUNE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 300d64162aff..0b27f5d63ca0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -435,6 +435,21 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 	},
 #endif
+#ifdef CONFIG_SCHED_TUNE
+	{
+		.procname	= "sched_cfs_boost",
+		.data		= &sysctl_sched_cfs_boost,
+		.maxlen		= sizeof(sysctl_sched_cfs_boost),
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+		.mode		= 0444,
+#else
+		.mode		= 0644,
+#endif
+		.proc_handler	= &sysctl_sched_cfs_boost_handler,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
+#endif
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 95d1a66ba03a..a4062d382724 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -27,7 +27,7 @@ menuconfig BT
 		L2CAP (Logical Link Control and Adaptation Protocol)
 		SMP (Security Manager Protocol) on LE (Low Energy) links
 	     HCI Device drivers (Interface to the hardware)
-	     RFCOMM Module (RFCOMM Protocol)  
+	     RFCOMM Module (RFCOMM Protocol)
 	     BNEP Module (Bluetooth Network Encapsulation Protocol)
 	     CMTP Module (CAPI Message Transport Protocol)
 	     HIDP Module (Human Interface Device Protocol)
@@ -45,6 +45,15 @@ config BT_BREDR
 	depends on BT
 	default y
 
+config BT_LEDS
+	bool "Enable LED triggers"
+	depends on BT
+	depends on LEDS_CLASS
+	select LEDS_TRIGGERS
+	---help---
+	  This option enables LED triggers for bluetooth
+	  packet receive/transmit.
+
 source "net/bluetooth/rfcomm/Kconfig"
 
 source "net/bluetooth/bnep/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 2b15ae8c1def..e80a64d9f4f1 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_BT_BNEP)	+= bnep/
 obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o
-
 bluetooth_6lowpan-y := 6lowpan.o
 
 bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
@@ -20,4 +19,6 @@ bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
 bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
 bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
 
+bluetooth-$(CONFIG_BT_LEDS)	+= led.o
+
 subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 62edbf1b114e..208aab09d1bf 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -39,6 +39,7 @@
 
 #include "hci_request.h"
 #include "hci_debugfs.h"
+#include "led.h"
 #include "smp.h"
 
 static void hci_rx_work(struct work_struct *work);
@@ -3392,6 +3393,9 @@ int hci_register_dev(struct hci_dev *hdev)
 	if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
 		hci_dev_set_flag(hdev, HCI_RFKILLED);
 
+	bluetooth_led_names(hdev);
+	bluetooth_led_init(hdev);
+
 	hci_dev_set_flag(hdev, HCI_SETUP);
 	hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
@@ -3462,6 +3466,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_sock_dev_event(hdev, HCI_DEV_UNREG);
 
+	bluetooth_led_exit(hdev);
+
 	if (hdev->rfkill) {
 		rfkill_unregister(hdev->rfkill);
 		rfkill_destroy(hdev->rfkill);
@@ -3553,6 +3559,8 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	skb_queue_tail(&hdev->rx_q, skb);
 	queue_work(hdev->workqueue, &hdev->rx_work);
 
+	bluetooth_led_rx(hdev);
+
 	return 0;
 }
 EXPORT_SYMBOL(hci_recv_frame);
@@ -3629,6 +3637,8 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 		BT_ERR("%s sending frame failed (%d)", hdev->name, err);
 		kfree_skb(skb);
 	}
+
+	bluetooth_led_tx(hdev);
 }
 
 /* Send HCI command */
diff --git a/net/bluetooth/led.c b/net/bluetooth/led.c
new file mode 100644
index 000000000000..1f53c1a5c195
--- /dev/null
+++ b/net/bluetooth/led.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2015, Guodong Xu <guodong.xu@linaro.org>
+ * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include "led.h"
+
+#define BLUETOOTH_BLINK_DELAY 50 /* ms */
+
+void bluetooth_led_rx(struct hci_dev *hdev)
+{
+	unsigned long led_delay = BLUETOOTH_BLINK_DELAY;
+	if (unlikely(!hdev->rx_led))
+		return;
+	led_trigger_blink_oneshot(hdev->rx_led, &led_delay, &led_delay, 0);
+}
+EXPORT_SYMBOL_GPL(bluetooth_led_rx);
+
+void bluetooth_led_tx(struct hci_dev *hdev)
+{
+	unsigned long led_delay = BLUETOOTH_BLINK_DELAY;
+	if (unlikely(!hdev->tx_led))
+		return;
+	led_trigger_blink_oneshot(hdev->tx_led, &led_delay, &led_delay, 0);
+}
+EXPORT_SYMBOL_GPL(bluetooth_led_tx);
+
+void bluetooth_led_names(struct hci_dev *hdev)
+{
+	snprintf(hdev->rx_led_name, sizeof(hdev->rx_led_name),
+		 "%srx", hdev->name);
+	snprintf(hdev->tx_led_name, sizeof(hdev->tx_led_name),
+		 "%stx", hdev->name);
+}
+
+void bluetooth_led_init(struct hci_dev *hdev)
+{
+	hdev->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+	if (hdev->rx_led) {
+		hdev->rx_led->name = hdev->rx_led_name;
+		if (led_trigger_register(hdev->rx_led)) {
+			kfree(hdev->rx_led);
+			hdev->rx_led = NULL;
+		}
+	}
+
+	hdev->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+	if (hdev->tx_led) {
+		hdev->tx_led->name = hdev->tx_led_name;
+		if (led_trigger_register(hdev->tx_led)) {
+			kfree(hdev->tx_led);
+			hdev->tx_led = NULL;
+		}
+	}
+}
+
+void bluetooth_led_exit(struct hci_dev *hdev)
+{
+	if (hdev->tx_led) {
+		led_trigger_unregister(hdev->tx_led);
+		kfree(hdev->tx_led);
+	}
+	if (hdev->rx_led) {
+		led_trigger_unregister(hdev->rx_led);
+		kfree(hdev->rx_led);
+	}
+}
diff --git a/net/bluetooth/led.h b/net/bluetooth/led.h
new file mode 100644
index 000000000000..766a211203ff
--- /dev/null
+++ b/net/bluetooth/led.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2015, Guodong Xu <guodong.xu@linaro.org>
+ * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/leds.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifdef CONFIG_BT_LEDS
+void bluetooth_led_rx(struct hci_dev *hdev);
+void bluetooth_led_tx(struct hci_dev *hdev);
+void bluetooth_led_names(struct hci_dev *hdev);
+void bluetooth_led_init(struct hci_dev *hdev);
+void bluetooth_led_exit(struct hci_dev *hdev);
+#else
+static inline void bluetooth_led_rx(struct hci_dev *hdev)
+{
+}
+static inline void bluetooth_led_tx(struct hci_dev *hdev)
+{
+}
+static inline void bluetooth_led_names(struct hci_dev *hdev)
+{
+}
+static inline void bluetooth_led_init(struct hci_dev *hdev)
+{
+}
+static inline void bluetooth_led_exit(struct hci_dev *hdev)
+{
+}
+#endif
diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig
index 7ff7d88e46dd..e6175534d1c0 100644
--- a/sound/soc/Kconfig
+++ b/sound/soc/Kconfig
@@ -46,6 +46,7 @@ source "sound/soc/cirrus/Kconfig"
 source "sound/soc/davinci/Kconfig"
 source "sound/soc/dwc/Kconfig"
 source "sound/soc/fsl/Kconfig"
+source "sound/soc/hisilicon/Kconfig"
 source "sound/soc/jz4740/Kconfig"
 source "sound/soc/nuc900/Kconfig"
 source "sound/soc/omap/Kconfig"
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 8eb06db32fa0..33e7ddf59d29 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_SND_SOC)	+= cirrus/
 obj-$(CONFIG_SND_SOC)	+= davinci/
 obj-$(CONFIG_SND_SOC)	+= dwc/
 obj-$(CONFIG_SND_SOC)	+= fsl/
+obj-$(CONFIG_SND_SOC)	+= hisilicon/
 obj-$(CONFIG_SND_SOC)	+= jz4740/
 obj-$(CONFIG_SND_SOC)	+= intel/
 obj-$(CONFIG_SND_SOC)	+= mediatek/
diff --git a/sound/soc/hisilicon/Kconfig b/sound/soc/hisilicon/Kconfig
new file mode 100644
index 000000000000..4356d5a1d338
--- /dev/null
+++ b/sound/soc/hisilicon/Kconfig
@@ -0,0 +1,5 @@
+config SND_I2S_HI6210_I2S
+	tristate "Hisilicon I2S controller"
+	select SND_SOC_GENERIC_DMAENGINE_PCM
+	help
+	  Hisilicon I2S
diff --git a/sound/soc/hisilicon/Makefile b/sound/soc/hisilicon/Makefile
new file mode 100644
index 000000000000..f30d9953526a
--- /dev/null
+++ b/sound/soc/hisilicon/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_SND_I2S_HI6210_I2S) += hi6210-i2s.o \
+				    hi6210-hdmi-card.o
diff --git a/sound/soc/hisilicon/hi6210-hdmi-card.c b/sound/soc/hisilicon/hi6210-hdmi-card.c
new file mode 100644
index 000000000000..93fc121b4158
--- /dev/null
+++ b/sound/soc/hisilicon/hi6210-hdmi-card.c
@@ -0,0 +1,130 @@
+/*
+ * linux/sound/soc/hisilicon/hi6210-hdmi-card.c
+ *
+ * Copyright (C) 2015 Linaro, Ltd
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+
+static int hdmi_hw_params(struct snd_pcm_substream *substream,
+			  struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
+	int ret;
+
+	ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_I2S |
+			SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret)
+		return ret;
+
+	/* set i2s system clock */
+	ret = snd_soc_dai_set_sysclk(cpu_dai, 0, 24576000, SND_SOC_CLOCK_IN);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/* operations of sound device */
+static struct snd_soc_ops hdmi_ops = {
+	.hw_params = hdmi_hw_params,
+};
+
+static struct snd_soc_dai_link hi6210_hdmi_dai_link = {
+	.name = "hi6210-hdmi-dai-link",  /* "codec name" */
+	.stream_name = "hdmi", /* stream name */
+
+	.cpu_dai_name ="f7118000.hi6210_i2s",
+	.codec_name = "0.hi6210_hdmi_card",
+	.be_id = 0,
+	.ops = &hdmi_ops,
+	.codec_dai_name = "hi6210_hdmi_dai",
+	.platform_name = "f7118000.hi6210_i2s",
+};
+
+static struct snd_soc_card snd_soc_hi6210_hdmi = {
+	.name = "hi6210-hdmi",
+	.owner = THIS_MODULE,
+	.dai_link = &hi6210_hdmi_dai_link,
+	.num_links = 1,
+};
+
+static const struct snd_soc_dai_ops hi6210_hdmi_dai_ops = {
+};
+
+static struct snd_soc_dai_driver hi6210_hdmi_dai = {
+	.name = "hi6210_hdmi_dai",
+	.playback = {
+		.channels_min = 2,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	},
+	.ops = &hi6210_hdmi_dai_ops,
+};
+
+static struct snd_soc_codec_driver hi6210_hdmi_codec;
+
+static int hi6210_hdmi_probe(struct platform_device *pdev)
+{
+	struct snd_soc_card *card = &snd_soc_hi6210_hdmi;
+	int ret;
+
+	ret = snd_soc_register_codec(&pdev->dev, &hi6210_hdmi_codec,
+			&hi6210_hdmi_dai, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "snd_soc_register_codec failed (%d)\n", ret);
+		return ret;
+	}
+	card->dev = &pdev->dev;
+
+	ret = snd_soc_register_card(card);
+	if (ret) {
+		dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret);
+		snd_soc_unregister_codec(&pdev->dev);
+		card->dev = NULL;
+		return ret;
+	}
+	return 0;
+}
+
+static int hi6210_hdmi_remove(struct platform_device *pdev)
+{
+	struct snd_soc_card *card = platform_get_drvdata(pdev);
+
+	snd_soc_unregister_card(card);
+	snd_soc_unregister_codec(&pdev->dev);
+	card->dev = NULL;
+	return 0;
+}
+
+static const struct of_device_id hi6210_hdmi_dt_ids[] = {
+	{ .compatible = "hisilicon,hi6210-hdmi-audio-card" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, hi6210_hdmi_dt_ids);
+
+static struct platform_driver hi6210_hdmi_driver = {
+	.driver = {
+		.name = "hi6210-hdmi-audio",
+		.owner = THIS_MODULE,
+		.of_match_table = hi6210_hdmi_dt_ids,
+	},
+	.probe = hi6210_hdmi_probe,
+	.remove = hi6210_hdmi_remove,
+};
+
+module_platform_driver(hi6210_hdmi_driver);
+
+MODULE_AUTHOR("andy.green@linaro.org");
+MODULE_DESCRIPTION("Hisilicon HDMI machine ASoC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:hi6210-hdmi-audio");
diff --git a/sound/soc/hisilicon/hi6210-i2s.c b/sound/soc/hisilicon/hi6210-i2s.c
new file mode 100644
index 000000000000..8e44de6f938c
--- /dev/null
+++ b/sound/soc/hisilicon/hi6210-i2s.c
@@ -0,0 +1,636 @@
+/*
+ * linux/sound/soc/m8m/hi6210_i2s.c - I2S IP driver
+ *
+ * Copyright (C) 2015 Linaro, Ltd
+ * Author: Andy Green <andy.green@linaro.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver only deals with S2 interface (BT)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/jiffies.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/dmaengine_pcm.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <linux/interrupt.h>
+#include <linux/reset.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/reset-controller.h>
+#include <linux/clk.h>
+
+#include "hi6210-i2s.h"
+
+struct hi6210_i2s {
+	struct device *dev;
+	struct reset_control *rc;
+	struct clk *clk[8];
+	int clocks;
+	struct snd_soc_dai_driver dai;
+	void __iomem *base;
+	void __iomem *base_syscon;
+	void __iomem *base_pmctrl;
+	phys_addr_t base_phys;
+	struct snd_dmaengine_dai_dma_data dma_data[2];
+	int clk_rate;
+	spinlock_t lock;
+	int rate;
+	int format;
+	u8 bits;
+	u8 channels;
+	u8 id;
+	u8 channel_length;
+	u8 use;
+	u32 master:1;
+	u32 status:1;
+};
+
+#define SC_PERIPH_CLKEN1	0x210
+#define SC_PERIPH_CLKDIS1	0x214
+
+#define SC_PERIPH_CLKEN3	0x230
+#define SC_PERIPH_CLKDIS3	0x234
+
+#define SC_PERIPH_CLKEN12	0x270
+#define SC_PERIPH_CLKDIS12	0x274
+
+#define SC_PERIPH_RSTEN1	0x310
+#define SC_PERIPH_RSTDIS1	0x314
+#define SC_PERIPH_RSTSTAT1	0x318
+
+#define SC_PERIPH_RSTEN2	0x320
+#define SC_PERIPH_RSTDIS2	0x324
+#define SC_PERIPH_RSTSTAT2	0x328
+
+#define SOC_PMCTRL_BBPPLLALIAS	0x48
+
+static void hi6210_bits(struct hi6210_i2s *i2s, u32 ofs, u32 reset, u32 set)
+{
+	u32 val = readl(i2s->base + ofs) & ~reset;
+
+	writel(val | set, i2s->base + ofs);
+}
+
+static int _hi6210_i2s_set_fmt(struct hi6210_i2s *i2s,
+			       struct snd_pcm_substream *substream)
+{
+	u32 u;
+
+	hi6210_bits(i2s, HII2S_ST_DL_FIFO_TH_CFG,
+		    (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_MASK <<
+		     HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT) |
+		    (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_MASK <<
+		     HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT) |
+		    (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_MASK <<
+		     HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT) |
+		    (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_MASK <<
+		     HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT),
+		    (16 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT) |
+		    (30 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT) |
+		    (16 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT) |
+		    (30 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT));
+
+	hi6210_bits(i2s, HII2S_IF_CLK_EN_CFG, 0,
+		    BIT(19) | BIT(18) | BIT(17) |
+		    HII2S_IF_CLK_EN_CFG__S2_IF_CLK_EN |
+		    HII2S_IF_CLK_EN_CFG__S2_OL_MIXER_EN |
+		    HII2S_IF_CLK_EN_CFG__S2_OL_SRC_EN |
+		    HII2S_IF_CLK_EN_CFG__ST_DL_R_EN |
+		    HII2S_IF_CLK_EN_CFG__ST_DL_L_EN);
+
+	hi6210_bits(i2s, HII2S_DIG_FILTER_CLK_EN_CFG,
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACR_SDM_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACR_HBF2I_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACR_AGC_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACL_SDM_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACL_HBF2I_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACL_AGC_EN,
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACR_MIXER_EN |
+		    HII2S_DIG_FILTER_CLK_EN_CFG__DACL_MIXER_EN
+	);
+
+	hi6210_bits(i2s, HII2S_DIG_FILTER_MODULE_CFG,
+		    HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN2_MUTE |
+		    HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN2_MUTE,
+		    0
+	);
+	hi6210_bits(i2s, HII2S_MUX_TOP_MODULE_CFG,
+		    HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN1_MUTE |
+		    HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN2_MUTE |
+		    HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN1_MUTE |
+		    HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN2_MUTE,
+		    0
+	);
+
+	switch (i2s->format & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		i2s->master = false;
+		hi6210_bits(i2s, HII2S_I2S_CFG, 0, HII2S_I2S_CFG__S2_MST_SLV);
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		i2s->master = true;
+		hi6210_bits(i2s, HII2S_I2S_CFG, HII2S_I2S_CFG__S2_MST_SLV, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (i2s->format & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		u = HII2S_FORMAT_I2S;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		u = HII2S_FORMAT_LEFT_JUST;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		u = HII2S_FORMAT_RIGHT_JUST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set the i2s format */
+	hi6210_bits(i2s, HII2S_I2S_CFG,
+		    (HII2S_I2S_CFG__S2_FUNC_MODE_MASK <<
+		     HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT),
+		    u << HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT);
+
+	/* misc control */
+	hi6210_bits(i2s, HII2S_CLK_SEL,
+		    HII2S_CLK_SEL__I2S_BT_FM_SEL | /* BT gets the I2S */
+		    HII2S_CLK_SEL__EXT_12_288MHZ_SEL, /* internal clock src */
+		    0);
+
+	return 0;
+}
+
+int hi6210_i2s_startup(struct snd_pcm_substream *substream,
+		     struct snd_soc_dai *cpu_dai)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+	int ret, n;
+
+	/* deassert reset on ABB */
+	if (readl(i2s->base_syscon + SC_PERIPH_RSTSTAT2) & BIT(4))
+		writel(BIT(4), i2s->base_syscon + SC_PERIPH_RSTDIS2);
+
+	for (n = 0; n < i2s->clocks; n++) {
+		ret = clk_prepare_enable(i2s->clk[n]);
+		if (ret)
+			return ret;
+	}
+
+	ret = clk_set_rate(i2s->clk[1], 49152000);
+	if (ret) {
+		dev_err(i2s->dev, "%s: setting 49.152MHz base rate failed %d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* enable clock before frequency division */
+	writel(BIT(9), i2s->base_syscon + SC_PERIPH_CLKEN12);
+
+	/* enable codec working clock / == "codec bus clock" */
+	writel(BIT(5), i2s->base_syscon + SC_PERIPH_CLKEN1);
+
+	/* deassert reset on codec / interface clock / working clock */
+	writel(BIT(5), i2s->base_syscon + SC_PERIPH_RSTEN1);
+	writel(BIT(5), i2s->base_syscon + SC_PERIPH_RSTDIS1);
+
+	/* not interested in i2s irqs */
+	hi6210_bits(i2s, HII2S_CODEC_IRQ_MASK, 0, 0x3f);
+
+	/* reset the stereo downlink fifo */
+	hi6210_bits(i2s, HII2S_APB_AFIFO_CFG_1, 0, BIT(5) | BIT(4));
+	hi6210_bits(i2s, HII2S_APB_AFIFO_CFG_1, BIT(5) | BIT(4), 0);
+
+	hi6210_bits(i2s, HII2S_SW_RST_N,
+		    (HII2S_SW_RST_N__ST_DL_WORDLEN_MASK <<
+		     HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT),
+		    (HII2S_BITS_16 << HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT)
+	);
+
+	hi6210_bits(i2s, HII2S_MISC_CFG,
+		    HII2S_MISC_CFG__ST_DL_TEST_SEL | /* mux 11/12 = APB not i2s */
+		    HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL | /* BT R ch  0 = mixer op of DACR ch */
+		    HII2S_MISC_CFG__S2_DOUT_TEST_SEL,
+		    HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL |
+		    HII2S_MISC_CFG__S2_DOUT_TEST_SEL /* BT L ch = 1 = mux 7 = "mixer output of DACL */
+	);
+
+	/* disable the local i2s reset */
+	hi6210_bits(i2s, HII2S_SW_RST_N, 0, HII2S_SW_RST_N__SW_RST_N);
+
+	return 0;
+}
+void hi6210_i2s_shutdown(struct snd_pcm_substream *substream,
+		       struct snd_soc_dai *cpu_dai)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+	int n;
+
+	for (n = 0; n < i2s->clocks; n++)
+		clk_disable_unprepare(i2s->clk[n]);
+
+	writel(BIT(5), i2s->base_syscon + SC_PERIPH_RSTEN1);
+}
+
+static void hi6210_i2s_txctrl(struct snd_soc_dai *cpu_dai, int on)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+
+	spin_lock(&i2s->lock);
+
+	if (on) {
+		/* enable S2 TX */
+		hi6210_bits(i2s, HII2S_I2S_CFG, 0, HII2S_I2S_CFG__S2_IF_TX_EN);
+	} else
+		/* disable S2 TX */
+		hi6210_bits(i2s, HII2S_I2S_CFG, HII2S_I2S_CFG__S2_IF_TX_EN, 0);
+
+	spin_unlock(&i2s->lock);
+}
+
+static void hi6210_i2s_rxctrl(struct snd_soc_dai *cpu_dai, int on)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+
+	spin_lock(&i2s->lock);
+	if (on)
+		hi6210_bits(i2s, HII2S_I2S_CFG, 0, HII2S_I2S_CFG__S2_IF_RX_EN);
+	else
+		hi6210_bits(i2s, HII2S_I2S_CFG, HII2S_I2S_CFG__S2_IF_RX_EN, 0);
+
+	spin_unlock(&i2s->lock);
+}
+
+static int hi6210_i2s_set_sysclk(struct snd_soc_dai *cpu_dai,
+			     int clk_id, unsigned int freq, int dir)
+{
+	return 0;
+}
+
+static int hi6210_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+
+	i2s->format = fmt;
+	i2s->master = (i2s->format & SND_SOC_DAIFMT_MASTER_MASK) ==
+		      SND_SOC_DAIFMT_CBS_CFS;
+
+	return 0;
+}
+
+static int hi6210_i2s_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *cpu_dai)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+	u32 u, signed_data = 0;
+        struct snd_dmaengine_dai_dma_data *dma_data;
+
+        dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream);
+
+	_hi6210_i2s_set_fmt(i2s, substream);
+
+	dma_data->maxburst = 2;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		dma_data->addr = i2s->base_phys + HII2S_ST_DL_CHANNEL;
+	else
+		dma_data->addr = i2s->base_phys + HII2S_STEREO_UPLINK_CHANNEL;
+
+	i2s->channels = params_channels(params);
+	if (i2s->channels == 1)
+		hi6210_bits(i2s, HII2S_I2S_CFG, 0, HII2S_I2S_CFG__S2_FRAME_MODE);
+	else
+		hi6210_bits(i2s, HII2S_I2S_CFG, HII2S_I2S_CFG__S2_FRAME_MODE, 0);
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_U16_LE:
+		signed_data = HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT;
+		/* fallthru */
+	case SNDRV_PCM_FORMAT_S16_LE:
+		i2s->bits = 16;
+		dma_data->addr_width = 2;
+		u = HII2S_BITS_16;
+		break;
+
+	case SNDRV_PCM_FORMAT_U24_LE:
+		signed_data = HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT;
+		/* fallthru */
+	case SNDRV_PCM_FORMAT_S24_LE:
+		i2s->bits = 32;
+		u = HII2S_BITS_24;
+		dma_data->addr_width = 3;
+		break;
+	default:
+		dev_err(cpu_dai->dev, "Bad format\n");
+		return -EINVAL;
+	}
+
+	/* clear loopback, set signed type and word length */
+	hi6210_bits(i2s, HII2S_I2S_CFG,
+		    HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT |
+		    (HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_MASK <<
+		     HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT) |
+		    (HII2S_I2S_CFG__S2_DIRECT_LOOP_MASK <<
+		     HII2S_I2S_CFG__S2_DIRECT_LOOP_SHIFT),
+		    signed_data |
+		    (u << HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT));
+
+	i2s->channel_length = i2s->channels * i2s->bits;
+	i2s->rate = params_rate(params);
+
+	switch (i2s->rate) {
+	case 8000:
+		u = HII2S_FS_RATE_8KHZ;
+		break;
+	case 16000:
+		u = HII2S_FS_RATE_16KHZ;
+		break;
+	case 32000:
+		u = HII2S_FS_RATE_32KHZ;
+		break;
+	case 48000:
+		u = HII2S_FS_RATE_48KHZ;
+		break;
+	case 96000:
+		u = HII2S_FS_RATE_96KHZ;
+		break;
+	case 192000:
+		u = HII2S_FS_RATE_192KHZ;
+		break;
+	};
+
+	if (!i2s->rate || !i2s->channel_length) {
+		dev_err(cpu_dai->dev, "channels/rate/bits on i2s bad\n");
+		return -EINVAL;
+	}
+
+	if (!i2s->master)
+		return 0;
+
+	/* set DAC and related units to correct rate */
+	hi6210_bits(i2s, HII2S_FS_CFG,
+		    (HII2S_FS_CFG__FS_S2_MASK <<
+		     HII2S_FS_CFG__FS_S2_SHIFT) |
+		    (HII2S_FS_CFG__FS_DACLR_MASK <<
+		     HII2S_FS_CFG__FS_DACLR_SHIFT) |
+		    (HII2S_FS_CFG__FS_ST_DL_R_MASK <<
+		     HII2S_FS_CFG__FS_ST_DL_R_SHIFT) |
+		    (HII2S_FS_CFG__FS_ST_DL_L_MASK <<
+		     HII2S_FS_CFG__FS_ST_DL_L_SHIFT),
+		    (u << HII2S_FS_CFG__FS_S2_SHIFT) |
+		    (u << HII2S_FS_CFG__FS_DACLR_SHIFT) |
+		    (u << HII2S_FS_CFG__FS_ST_DL_R_SHIFT) |
+		    (u << HII2S_FS_CFG__FS_ST_DL_L_SHIFT)
+	);
+
+	return 0;
+}
+
+static int hi6210_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			  struct snd_soc_dai *cpu_dai)
+{
+	pr_debug("%s\n", __func__);
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+			hi6210_i2s_rxctrl(cpu_dai, 1);
+		else
+			hi6210_i2s_txctrl(cpu_dai, 1);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+			hi6210_i2s_rxctrl(cpu_dai, 0);
+		else
+			hi6210_i2s_txctrl(cpu_dai, 0);
+		break;
+	default:
+		dev_err(cpu_dai->dev, "uknown cmd\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int hi6210_i2s_dai_probe(struct snd_soc_dai *dai)
+{
+	struct hi6210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
+
+        snd_soc_dai_init_dma_data(dai,
+                                  &i2s->dma_data[SNDRV_PCM_STREAM_PLAYBACK],
+                                  &i2s->dma_data[SNDRV_PCM_STREAM_CAPTURE]);
+
+        return 0;
+}
+
+
+static struct snd_soc_dai_ops hi6210_i2s_dai_ops = {
+	.trigger	= hi6210_i2s_trigger,
+	.hw_params	= hi6210_i2s_hw_params,
+	.set_fmt	= hi6210_i2s_set_fmt,
+	.set_sysclk	= hi6210_i2s_set_sysclk,
+	.startup	= hi6210_i2s_startup,
+	.shutdown	= hi6210_i2s_shutdown,
+};
+
+struct snd_soc_dai_driver hi6210_i2s_dai_init = {
+	.name = "hi6210_i2s",
+	.probe		= hi6210_i2s_dai_probe,
+	.playback = {
+		.channels_min = 2,
+		.channels_max = 2,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE |
+			   SNDRV_PCM_FMTBIT_U16_LE,
+		.rates = SNDRV_PCM_RATE_48000,
+	},
+	.capture = {
+		.channels_min = 2,
+		.channels_max = 2,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE |
+			   SNDRV_PCM_FMTBIT_U16_LE,
+		.rates = SNDRV_PCM_RATE_48000,
+	},
+	.ops = &hi6210_i2s_dai_ops,
+};
+
+static const struct snd_soc_component_driver hi6210_i2s_i2s_comp = {
+	.name = "hi6210_i2s-i2s",
+};
+
+#include <sound/dmaengine_pcm.h>
+
+static const struct snd_pcm_hardware snd_hi6210_hardware = {
+        .info                   = SNDRV_PCM_INFO_MMAP |
+                                  SNDRV_PCM_INFO_MMAP_VALID |
+                                  SNDRV_PCM_INFO_PAUSE |
+                                  SNDRV_PCM_INFO_RESUME |
+                                  SNDRV_PCM_INFO_INTERLEAVED |
+                                  SNDRV_PCM_INFO_HALF_DUPLEX,
+        .period_bytes_min       = 4096,
+        .period_bytes_max       = 4096,
+        .periods_min            = 4,
+        .periods_max            = UINT_MAX,
+        .buffer_bytes_max       = SIZE_MAX,
+};
+
+static const struct snd_dmaengine_pcm_config hi6210_dmaengine_pcm_config = {
+        .pcm_hardware = &snd_hi6210_hardware,
+	.prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
+        .prealloc_buffer_size = 64 * 1024,
+};
+
+static int hi6210_i2s_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hi6210_i2s *i2s;
+	struct resource *res;
+	int ret;
+
+	i2s = kzalloc(sizeof(*i2s), GFP_KERNEL);
+	if (!i2s)
+		return -ENOMEM;
+
+	i2s->dev = dev;
+	spin_lock_init(&i2s->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		ret = -ENODEV;
+		goto err2;
+	}
+	i2s->base_phys = (phys_addr_t)res->start;
+
+	i2s->dai = hi6210_i2s_dai_init;
+	dev_set_drvdata(&pdev->dev, i2s);
+
+	i2s->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(i2s->base)) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		ret = PTR_ERR(i2s->base);
+		goto err2;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		ret = -ENODEV;
+		goto err2;
+	}
+	i2s->base_syscon = devm_ioremap(dev, res->start, resource_size(res));
+	if (IS_ERR(i2s->base_syscon)) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		ret = PTR_ERR(i2s->base_syscon);
+		goto err2;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	if (!res) {
+		ret = -ENODEV;
+		goto err2;
+	}
+	i2s->base_pmctrl = devm_ioremap_resource(dev, res);
+	if (IS_ERR(i2s->base_pmctrl)) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		ret = PTR_ERR(i2s->base_pmctrl);
+		goto err2;
+	}
+
+	do {
+		i2s->clk[i2s->clocks] = of_clk_get(pdev->dev.of_node,
+						   i2s->clocks);
+		if (IS_ERR_OR_NULL(i2s->clk[i2s->clocks]))
+			break;
+		i2s->clocks++;
+	} while (i2s->clocks < ARRAY_SIZE(i2s->clk));
+	if (!i2s->clocks) {
+		ret = PTR_ERR(i2s->clk[0]);
+		dev_err(&pdev->dev, "Failed to get clock\n");
+		goto err2;
+	}
+
+	ret = devm_snd_dmaengine_pcm_register(&pdev->dev,
+					      &hi6210_dmaengine_pcm_config,
+					      0);
+	if (ret)
+		goto err3;
+
+	ret = snd_soc_register_component(&pdev->dev, &hi6210_i2s_i2s_comp,
+					 &i2s->dai, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to register dai\n");
+		goto err3;
+	}
+
+	dev_info(&pdev->dev, "Registered as %s\n", i2s->dai.name);
+
+	return 0;
+
+err3:
+	while (--i2s->clocks)
+		clk_put(i2s->clk[i2s->clocks]);
+
+err2:
+	kfree(i2s);
+
+	return ret;
+}
+
+static int hi6210_i2s_remove(struct platform_device *pdev)
+{
+	struct hi6210_i2s *i2s = dev_get_drvdata(&pdev->dev);
+
+	snd_soc_unregister_component(&pdev->dev);
+	dev_set_drvdata(&pdev->dev, NULL);
+	iounmap(i2s->base);
+
+	while (--i2s->clocks)
+		clk_put(i2s->clk[i2s->clocks]);
+
+	kfree(i2s);
+
+	return 0;
+}
+
+static const struct of_device_id hi6210_i2s_dt_ids[] = {
+	{ .compatible = "hisilicon,hi6210-i2s" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, hi6210_i2s_dt_ids);
+
+static struct platform_driver hi6210_i2s_driver = {
+	.probe = hi6210_i2s_probe,
+	.remove = hi6210_i2s_remove,
+	.driver = {
+		.name = "hi6210_i2s",
+		.owner = THIS_MODULE,
+		.of_match_table = hi6210_i2s_dt_ids,
+	},
+};
+
+module_platform_driver(hi6210_i2s_driver);
+
+MODULE_DESCRIPTION("Hisilicon HI6210 I2S driver");
+MODULE_AUTHOR("Andy Green <andy.green@linaro.org>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/hisilicon/hi6210-i2s.h b/sound/soc/hisilicon/hi6210-i2s.h
new file mode 100644
index 000000000000..d4233778d2fd
--- /dev/null
+++ b/sound/soc/hisilicon/hi6210-i2s.h
@@ -0,0 +1,273 @@
+/*
+ * linux/sound/soc/hisilicon/hi6210-i2s.h
+ *
+ * Copyright (C) 2015 Linaro, Ltd
+ * Author: Andy Green <andy.green@linaro.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note at least on 6220, S2 == BT, S1 == Digital FM Radio IF
+ */
+
+#ifndef _HI6210_I2S_H
+#define _HI6210_I2S_H
+
+#define HII2S_SW_RST_N				0
+  #define HII2S_SW_RST_N__STEREO_UPLINK_WORDLEN_SHIFT			28
+  #define HII2S_SW_RST_N__STEREO_UPLINK_WORDLEN_MASK			3
+  #define HII2S_SW_RST_N__THIRDMD_UPLINK_WORDLEN_SHIFT			26
+  #define HII2S_SW_RST_N__THIRDMD_UPLINK_WORDLEN_MASK			3
+  #define HII2S_SW_RST_N__VOICE_UPLINK_WORDLEN_SHIFT			24
+  #define HII2S_SW_RST_N__VOICE_UPLINK_WORDLEN_MASK			3
+  #define HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT				20
+  #define HII2S_SW_RST_N__ST_DL_WORDLEN_MASK				3
+  #define HII2S_SW_RST_N__THIRDMD_DLINK_WORDLEN_SHIFT			18
+  #define HII2S_SW_RST_N__THIRDMD_DLINK_WORDLEN_MASK			3
+  #define HII2S_SW_RST_N__VOICE_DLINK_WORDLEN_SHIFT			16
+  #define HII2S_SW_RST_N__VOICE_DLINK_WORDLEN_MASK			3
+
+  #define HII2S_SW_RST_N__SW_RST_N					BIT(0)
+
+  enum hi6210_bits {
+	HII2S_BITS_16,
+	HII2S_BITS_18,
+	HII2S_BITS_20,
+	HII2S_BITS_24,
+};
+
+
+#define HII2S_IF_CLK_EN_CFG			4
+
+  #define HII2S_IF_CLK_EN_CFG__THIRDMD_UPLINK_EN			BIT(25)
+  #define HII2S_IF_CLK_EN_CFG__THIRDMD_DLINK_EN				BIT(24)
+  #define HII2S_IF_CLK_EN_CFG__S3_IF_CLK_EN				BIT(20)
+  #define HII2S_IF_CLK_EN_CFG__S2_IF_CLK_EN				BIT(16)
+  #define HII2S_IF_CLK_EN_CFG__S2_OL_MIXER_EN				BIT(15)
+  #define HII2S_IF_CLK_EN_CFG__S2_OL_SRC_EN				BIT(14)
+  #define HII2S_IF_CLK_EN_CFG__S2_IR_PGA_EN				BIT(13)
+  #define HII2S_IF_CLK_EN_CFG__S2_IL_PGA_EN				BIT(12)
+  #define HII2S_IF_CLK_EN_CFG__S1_IR_PGA_EN				BIT(10)
+  #define HII2S_IF_CLK_EN_CFG__S1_IL_PGA_EN				BIT(9)
+  #define HII2S_IF_CLK_EN_CFG__S1_IF_CLK_EN				BIT(8)
+  #define HII2S_IF_CLK_EN_CFG__VOICE_DLINK_SRC_EN			BIT(7)
+  #define HII2S_IF_CLK_EN_CFG__VOICE_DLINK_EN				BIT(6)
+  #define HII2S_IF_CLK_EN_CFG__ST_DL_R_EN				BIT(5)
+  #define HII2S_IF_CLK_EN_CFG__ST_DL_L_EN				BIT(4)
+  #define HII2S_IF_CLK_EN_CFG__VOICE_UPLINK_R_EN			BIT(3)
+  #define HII2S_IF_CLK_EN_CFG__VOICE_UPLINK_L_EN			BIT(2)
+  #define HII2S_IF_CLK_EN_CFG__STEREO_UPLINK_R_EN			BIT(1)
+  #define HII2S_IF_CLK_EN_CFG__STEREO_UPLINK_L_EN			BIT(0)
+
+#define HII2S_DIG_FILTER_CLK_EN_CFG		8
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_SDM_EN			BIT(30)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_HBF2I_EN			BIT(28)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_MIXER_EN			BIT(25)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_AGC_EN			BIT(24)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_SDM_EN			BIT(22)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_HBF2I_EN			BIT(20)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_MIXER_EN			BIT(17)
+  #define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_AGC_EN			BIT(16)
+
+#define HII2S_FS_CFG				0xc
+  #define HII2S_FS_CFG__FS_S2_SHIFT					28
+  #define HII2S_FS_CFG__FS_S2_MASK					7
+  #define HII2S_FS_CFG__FS_S1_SHIFT					24
+  #define HII2S_FS_CFG__FS_S1_MASK					7
+  #define HII2S_FS_CFG__FS_ADCLR_SHIFT					20
+  #define HII2S_FS_CFG__FS_ADCLR_MASK					7
+  #define HII2S_FS_CFG__FS_DACLR_SHIFT					16
+  #define HII2S_FS_CFG__FS_DACLR_MASK					7
+  #define HII2S_FS_CFG__FS_ST_DL_R_SHIFT				8
+  #define HII2S_FS_CFG__FS_ST_DL_R_MASK					7
+  #define HII2S_FS_CFG__FS_ST_DL_L_SHIFT				4
+  #define HII2S_FS_CFG__FS_ST_DL_L_MASK					7
+  #define HII2S_FS_CFG__FS_VOICE_DLINK_SHIFT				0
+  #define HII2S_FS_CFG__FS_VOICE_DLINK_MASK				7
+
+enum hi6210_i2s_rates {
+	HII2S_FS_RATE_8KHZ = 0,
+	HII2S_FS_RATE_16KHZ = 1,
+	HII2S_FS_RATE_32KHZ = 2,
+	HII2S_FS_RATE_48KHZ = 4,
+	HII2S_FS_RATE_96KHZ = 5,
+	HII2S_FS_RATE_192KHZ = 6,
+};
+
+#define HII2S_I2S_CFG				0x10
+
+  #define HII2S_I2S_CFG__S2_IF_TX_EN					BIT(31)
+  #define HII2S_I2S_CFG__S2_IF_RX_EN					BIT(30)
+  #define HII2S_I2S_CFG__S2_FRAME_MODE					BIT(29)
+  #define HII2S_I2S_CFG__S2_MST_SLV					BIT(28)
+  #define HII2S_I2S_CFG__S2_LRCK_MODE					BIT(27)
+  #define HII2S_I2S_CFG__S2_CHNNL_MODE					BIT(26)
+  #define HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT			24
+  #define HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_MASK			3
+  #define HII2S_I2S_CFG__S2_DIRECT_LOOP_SHIFT				22
+  #define HII2S_I2S_CFG__S2_DIRECT_LOOP_MASK				3
+  #define HII2S_I2S_CFG__S2_TX_CLK_SEL					BIT(21)
+  #define HII2S_I2S_CFG__S2_RX_CLK_SEL					BIT(20)
+  #define HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT				BIT(19)
+  #define HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT				16
+  #define HII2S_I2S_CFG__S2_FUNC_MODE_MASK				7
+  #define HII2S_I2S_CFG__S1_IF_TX_EN					BIT(15)
+  #define HII2S_I2S_CFG__S1_IF_RX_EN					BIT(14)
+  #define HII2S_I2S_CFG__S1_FRAME_MODE					BIT(13)
+  #define HII2S_I2S_CFG__S1_MST_SLV					BIT(12)
+  #define HII2S_I2S_CFG__S1_LRCK_MODE					BIT(11)
+  #define HII2S_I2S_CFG__S1_CHNNL_MODE					BIT(10)
+  #define HII2S_I2S_CFG__S1_CODEC_IO_WORDLENGTH_SHIFT			8
+  #define HII2S_I2S_CFG__S1_CODEC_IO_WORDLENGTH_MASK			3
+  #define HII2S_I2S_CFG__S1_DIRECT_LOOP_SHIFT				6
+  #define HII2S_I2S_CFG__S1_DIRECT_LOOP_MASK				3
+  #define HII2S_I2S_CFG__S1_TX_CLK_SEL					BIT(5)
+  #define HII2S_I2S_CFG__S1_RX_CLK_SEL					BIT(4)
+  #define HII2S_I2S_CFG__S1_CODEC_DATA_FORMAT				BIT(3)
+  #define HII2S_I2S_CFG__S1_FUNC_MODE_SHIFT				0
+  #define HII2S_I2S_CFG__S1_FUNC_MODE_MASK				7
+
+enum hi6210_i2s_formats {
+	HII2S_FORMAT_I2S,
+	HII2S_FORMAT_PCM_STD,
+	HII2S_FORMAT_PCM_USER,
+	HII2S_FORMAT_LEFT_JUST,
+	HII2S_FORMAT_RIGHT_JUST,
+};
+
+#define HII2S_DIG_FILTER_MODULE_CFG 		0x14
+
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_GAIN_SHIFT		28
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_GAIN_MASK		3
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN4_MUTE		BIT(27)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN3_MUTE		BIT(26)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN2_MUTE		BIT(25)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN1_MUTE		BIT(24)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_GAIN_SHIFT		20
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_GAIN_MASK		3
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN4_MUTE		BIT(19)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN3_MUTE		BIT(18)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN2_MUTE		BIT(17)
+  #define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN1_MUTE		BIT(16)
+  #define HII2S_DIG_FILTER_MODULE_CFG__SW_DACR_SDM_DITHER		BIT(9)
+  #define HII2S_DIG_FILTER_MODULE_CFG__SW_DACL_SDM_DITHER		BIT(8)
+  #define HII2S_DIG_FILTER_MODULE_CFG__LM_CODEC_DAC2ADC_SHIFT		4
+  #define HII2S_DIG_FILTER_MODULE_CFG__LM_CODEC_DAC2ADC_MASK		7
+  #define HII2S_DIG_FILTER_MODULE_CFG__RM_CODEC_DAC2ADC_SHIFT		0
+  #define HII2S_DIG_FILTER_MODULE_CFG__RM_CODEC_DAC2ADC_MASK		7
+
+enum hi6210_gains {
+	HII2S_GAIN_100PC,
+	HII2S_GAIN_50PC,
+	HII2S_GAIN_25PC,
+};
+
+#define HII2S_MUX_TOP_MODULE_CFG		0x18
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_GAIN_SHIFT	14
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_GAIN_MASK		3
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN2_MUTE		BIT(13)
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN1_MUTE		BIT(12)
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_GAIN_SHIFT		10
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_GAIN_MASK		3
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN2_MUTE		BIT(9)
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN1_MUTE		BIT(8)
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_RDY			BIT(6)
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_MODE_SHIFT		4
+  #define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_MODE_MASK			3
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_RDY			BIT(3)
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_MODE_SHIFT		0
+  #define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_MODE_MASK		7
+
+enum hi6210_s2_src_mode {
+	HII2S_S2_SRC_MODE_3,
+	HII2S_S2_SRC_MODE_12,
+	HII2S_S2_SRC_MODE_6,
+	HII2S_S2_SRC_MODE_2,
+};
+
+enum hi6210_voice_dlink_src_mode {
+	HII2S_VOICE_DL_SRC_MODE_12 = 1,
+	HII2S_VOICE_DL_SRC_MODE_6,
+	HII2S_VOICE_DL_SRC_MODE_2,
+	HII2S_VOICE_DL_SRC_MODE_3,
+};
+
+#define HII2S_ADC_PGA_CFG			0x1c
+#define HII2S_S1_INPUT_PGA_CFG			0x20
+#define HII2S_S2_INPUT_PGA_CFG			0x24
+#define HII2S_ST_DL_PGA_CFG			0x28
+#define HII2S_VOICE_SIDETONE_DLINK_PGA_CFG	0x2c
+#define HII2S_APB_AFIFO_CFG_1			0x30
+#define HII2S_APB_AFIFO_CFG_2			0x34
+#define HII2S_ST_DL_FIFO_TH_CFG			0x38
+
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT			24
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_MASK			0x1f
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT			16
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_MASK			0x1f
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT			8
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_MASK			0x1f
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT			0
+  #define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_MASK			0x1f
+
+#define HII2S_STEREO_UPLINK_FIFO_TH_CFG		0x3c
+#define HII2S_VOICE_UPLINK_FIFO_TH_CFG		0x40
+#define HII2S_CODEC_IRQ_MASK			0x44
+#define HII2S_CODEC_IRQ				0x48
+#define HII2S_DACL_AGC_CFG_1			0x4c
+#define HII2S_DACL_AGC_CFG_2			0x50
+#define HII2S_DACR_AGC_CFG_1			0x54
+#define HII2S_DACR_AGC_CFG_2			0x58
+#define HII2S_DMIC_SIF_CFG			0x5c
+#define HII2S_MISC_CFG				0x60
+
+  #define HII2S_MISC_CFG__THIRDMD_DLINK_TEST_SEL			BIT(17)
+  #define HII2S_MISC_CFG__THIRDMD_DLINK_DIN_SEL				BIT(16)
+  #define HII2S_MISC_CFG__S3_DOUT_RIGHT_SEL				BIT(14)
+  #define HII2S_MISC_CFG__S3_DOUT_LEFT_SEL				BIT(13)
+  #define HII2S_MISC_CFG__S3_DIN_TEST_SEL				BIT(12)
+  #define HII2S_MISC_CFG__VOICE_DLINK_SRC_UP_DOUT_VLD_SEL		BIT(8)
+  #define HII2S_MISC_CFG__VOICE_DLINK_TEST_SEL				BIT(7)
+  #define HII2S_MISC_CFG__VOICE_DLINK_DIN_SEL				BIT(6)
+  #define HII2S_MISC_CFG__ST_DL_TEST_SEL				BIT(4)
+  #define HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL				BIT(3)
+  #define HII2S_MISC_CFG__S2_DOUT_TEST_SEL				BIT(2)
+  #define HII2S_MISC_CFG__S1_DOUT_TEST_SEL				BIT(1)
+  #define HII2S_MISC_CFG__S2_DOUT_LEFT_SEL				BIT(0)
+
+#define HII2S_S2_SRC_CFG			0x64
+#define HII2S_MEM_CFG				0x68
+#define HII2S_THIRDMD_PCM_PGA_CFG		0x6c
+#define HII2S_THIRD_MODEM_FIFO_TH		0x70
+#define HII2S_S3_ANTI_FREQ_JITTER_TX_INC_CNT	0x74
+#define HII2S_S3_ANTI_FREQ_JITTER_TX_DEC_CNT	0x78
+#define HII2S_S3_ANTI_FREQ_JITTER_RX_INC_CNT	0x7c
+#define HII2S_S3_ANTI_FREQ_JITTER_RX_DEC_CNT	0x80
+#define HII2S_ANTI_FREQ_JITTER_EN		0x84
+#define HII2S_CLK_SEL				0x88
+
+  /* 0 = BT owns the i2s */
+  #define HII2S_CLK_SEL__I2S_BT_FM_SEL					BIT(0)
+  /* 0 = internal source, 1 = ext */
+  #define HII2S_CLK_SEL__EXT_12_288MHZ_SEL				BIT(1)
+
+
+#define HII2S_THIRDMD_DLINK_CHANNEL		0xe8
+#define HII2S_THIRDMD_ULINK_CHANNEL		0xec
+#define HII2S_VOICE_DLINK_CHANNEL		0xf0
+
+/* shovel data in here for playback */
+#define HII2S_ST_DL_CHANNEL			0xf4
+#define HII2S_STEREO_UPLINK_CHANNEL		0xf8
+#define HII2S_VOICE_UPLINK_CHANNEL		0xfc
+
+#endif/* _HI6210_I2S_H */