mali: add TX011-BU-00001-r6p0-02rel0

Import all the files from TX011-BU-00001-r6p0-02rel0. Signed-off-by: Guillaume Tucker <guillaume.tucker@arm.com>
author: Guillaume Tucker <guillaume.tucker@arm.com> 2015-08-03 10:18:15 +0100
committer: Jon Medhurst <tixy@linaro.org> 2015-09-02 16:38:15 +0100
commit: c72936f406fe79ca529be1ae9748c68b77b90dac (patch)
tree: 9fb0a9ce121efe885f73d19b1c92d0d88e5e1e8e
parent: 9dbcbf30a5a0b499a277f7f0a4c318f71ceb8672 (diff)
248 files changed, 65684 insertions, 0 deletions
diff --git a/Documentation/devicetree/bindings/arm/mali-midgard.txt b/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100644
index 00000000000..ff71e1582ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,60 @@
+#
+# (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- snoop_enable_smc : SMC function ID to enable CCI snooping on the GPU port(s).
+- snoop_disable_smc : SMC function ID to disable CCI snooping on the GPU port(s).
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points = <
+		/* KHz   uV */
+		533000 1250000,
+		450000 1150000,
+		400000 1125000,
+		350000 1075000,
+		266000 1025000,
+		160000  925000,
+		100000  912500,
+	>;
+};
diff --git a/Documentation/dma-buf-test-exporter.txt b/Documentation/dma-buf-test-exporter.txt
new file mode 100644
index 00000000000..4208010cc78
--- /dev/null
+++ b/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/Documentation/kds.txt b/Documentation/kds.txt
new file mode 100644
index 00000000000..639288c106b
--- /dev/null
+++ b/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/drivers/base/dma_buf_lock/src/Kbuild b/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100644
index 00000000000..68dad07ad60
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/drivers/base/dma_buf_lock/src/Makefile b/drivers/base/dma_buf_lock/src/Makefile
new file mode 100644
index 00000000000..cf76132e6dd
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100644
index 00000000000..9613ffcfd69
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100644
index 00000000000..e1b9348b9cf
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/drivers/base/dma_buf_test_exporter/Kbuild b/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100644
index 00000000000..56b9f86d2d5
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/drivers/base/dma_buf_test_exporter/Kconfig b/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100644
index 00000000000..974f0c23dbd
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/drivers/base/dma_buf_test_exporter/Makefile b/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100644
index 00000000000..06e3d5c121a
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100644
index 00000000000..852c55075eb
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,635 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic,
+	.kmap = dma_buf_te_kmap
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto invalid_size;
+	}
+
+#if !(defined(ARCH_HAS_SG_CHAIN) || defined(CONFIG_ARCH_HAS_SG_CHAIN))
+	/* Whilst it is possible to allocate larger buffer, we won't be able to
+	 * map it during actual usage (mmap() still succeeds). We fail here so
+	 * userspace code can deal with it early than having driver failure
+	 * later on. */
+	if (alloc_req.size > SG_MAX_SINGLE_ALLOC) {
+		dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %lu pages",
+				__func__, alloc_req.size, SG_MAX_SINGLE_ALLOC);
+		goto invalid_size;
+	}
+#endif
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+				alloc->nr_pages * PAGE_SIZE,
+				&alloc->contig_dma_addr,
+				GFP_KERNEL | __GFP_ZERO);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
+#else
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
+			alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+#endif
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+invalid_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		ret = dma_buf_begin_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		if (ret)
+			goto no_cpu_access;
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+				ret = -EPERM;
+				goto no_cpu_access;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		offset += sg_dma_len(sg);
+	}
+
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/base/kds/Kbuild b/drivers/base/kds/Kbuild
new file mode 100644
index 00000000000..a88acd8fdce
--- /dev/null
+++ b/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/drivers/base/kds/Kconfig b/drivers/base/kds/Kconfig
new file mode 100644
index 00000000000..5f96165f67d
--- /dev/null
+++ b/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/drivers/base/kds/Makefile b/drivers/base/kds/Makefile
new file mode 100644
index 00000000000..364d1515c1a
--- /dev/null
+++ b/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/kds/kds.c b/drivers/base/kds/kds.c
new file mode 100644
index 00000000000..62612d414d0
--- /dev/null
+++ b/drivers/base/kds/kds.c
@@ -0,0 +1,559 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+	/* This resource set will be freed when there are no pending
+	 * callbacks */
+	struct kref           refcount;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+static void __resource_set_release(struct kref *ref)
+{
+	struct kds_resource_set *rset = container_of(ref,
+			struct kds_resource_set, refcount);
+
+	kfree(rset);
+}
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+	kref_init(&rset->refcount);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void trigger_new_rset_owner(struct kds_resource_set *rset,
+		struct list_head *triggered)
+{
+	if (0 == --rset->pending) {
+		/* new owner now triggered, track for callback later */
+		kref_get(&rset->refcount);
+		list_add(&rset->callback_link, triggered);
+	}
+}
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+						trigger_new_rset_owner(
+								it->parent,
+								&triggered);
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			trigger_new_rset_owner(it->parent, &triggered);
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				trigger_new_rset_owner(it->parent, &triggered);
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+
+		/* Free the resource set if no callbacks pending */
+		kref_put(&it->refcount, &__resource_set_release);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	kref_put(&rset->refcount, &__resource_set_release);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/base/ump/Kbuild b/drivers/base/ump/Kbuild
new file mode 100644
index 00000000000..2bbdba27d76
--- /dev/null
+++ b/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/drivers/base/ump/Kconfig b/drivers/base/ump/Kconfig
new file mode 100644
index 00000000000..f7451e67a5e
--- /dev/null
+++ b/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/drivers/base/ump/docs/Doxyfile b/drivers/base/ump/docs/Doxyfile
new file mode 100644
index 00000000000..fbec8eb40a0
--- /dev/null
+++ b/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/drivers/base/ump/example_kernel_api.c b/drivers/base/ump/example_kernel_api.c
new file mode 100644
index 00000000000..858dd8b52f0
--- /dev/null
+++ b/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/drivers/base/ump/example_user_api.c b/drivers/base/ump/example_user_api.c
new file mode 100644
index 00000000000..d143a640512
--- /dev/null
+++ b/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/drivers/base/ump/src/Kbuild b/drivers/base/ump/src/Kbuild
new file mode 100644
index 00000000000..de6d30770d1
--- /dev/null
+++ b/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/drivers/base/ump/src/Makefile b/drivers/base/ump/src/Makefile
new file mode 100644
index 00000000000..45428adbdf7
--- /dev/null
+++ b/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/base/ump/src/Makefile.common b/drivers/base/ump/src/Makefile.common
new file mode 100644
index 00000000000..f29a4c1cffa
--- /dev/null
+++ b/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/drivers/base/ump/src/arch-arm/config.h b/drivers/base/ump/src/arch-arm/config.h
new file mode 100644
index 00000000000..152d98f38af
--- /dev/null
+++ b/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/drivers/base/ump/src/arch-arm64/config.h b/drivers/base/ump/src/arch-arm64/config.h
new file mode 100644
index 00000000000..cfb14ca5487
--- /dev/null
+++ b/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2009, 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/drivers/base/ump/src/common/ump_kernel_core.c b/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100644
index 00000000000..07aa07739f9
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/drivers/base/ump/src/common/ump_kernel_core.h b/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100644
index 00000000000..8cb424d2893
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100644
index 00000000000..c5b0d744008
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100644
index 00000000000..d06c1455371
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/drivers/base/ump/src/common/ump_kernel_priv.h b/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100644
index 00000000000..38b6f1b197f
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/drivers/base/ump/src/imports/ion/Makefile b/drivers/base/ump/src/imports/ion/Makefile
new file mode 100644
index 00000000000..ef74b273f7a
--- /dev/null
+++ b/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100644
index 00000000000..12b2e325b45
--- /dev/null
+++ b/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux.c b/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100644
index 00000000000..d6c3c535490
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100644
index 00000000000..38db91e407a
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100644
index 00000000000..4fbf3b2a911
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/drivers/base/ump/src/ump_arch.h b/drivers/base/ump/src/ump_arch.h
new file mode 100644
index 00000000000..2303d56505a
--- /dev/null
+++ b/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2011, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/drivers/base/ump/ump_ref_drv.h b/drivers/base/ump/ump_ref_drv.h
new file mode 100644
index 00000000000..9a265fe1258
--- /dev/null
+++ b/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
new file mode 100644
index 00000000000..19c7e9a2659
--- /dev/null
+++ b/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
new file mode 100644
index 00000000000..1f30eb541d6
--- /dev/null
+++ b/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
new file mode 100644
index 00000000000..f4384983140
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,241 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r6p0-02rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_cpuprops.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_security.c \
+	mali_kbase_instr.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_smc.c
+
+ifeq ($(CONFIG_MALI_MIPE_ENABLED),y)
+	SRC += mali_kbase_tlstream.c
+	ifeq ($(MALI_UNIT_TEST),1)
+		SRC += mali_kbase_tlstream_test.c
+	endif
+endif
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ifeq ($(CONFIG_MACH_MANTA),y)
+	SRC += mali_kbase_mem_alloc_carveout.c
+else
+	SRC += mali_kbase_mem_alloc.c
+endif
+
+ccflags-y += -I$(KBASE_PATH)
+
+# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard
+else
+# out-of-tree
+MALI_METRICS_PATH = $(KBUILD_EXTMOD)
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+	SRC += platform/devicetree/mali_kbase_runtime_pm.c
+	SRC += platform/devicetree/mali_kbase_config_devicetree.c
+	ccflags-y += -I$(src)/platform/devicetree
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+ifneq ($(wildcard $(src)/internal/Kbuild),)
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL
+include  $(src)/internal/Kbuild
+mali_kbase-y += $(INTERNAL:.c=.o)
+endif
+endif
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 00000000000..15430438004
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,209 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+choice
+	prompt "Streamline support"
+	depends on MALI_MIDGARD
+	default MALI_TIMELINE_DISABLED
+	help
+	  Select streamline support configuration.
+
+config MALI_TIMELINE_DISABLED
+	bool "Streamline support disabled"
+	help
+	  Disable support for ARM Streamline Performance Analyzer.
+
+	  Timeline support will not be included in
+	  kernel code.
+	  Debug stream will not be generated.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+
+config MALI_MIPE_ENABLED
+	bool "Streamline support via MIPE"
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+
+	  Stream will be transmitted directly to Mali GPU library.
+	  Compatible version of the library is required to read debug stream generated by kernel.
+
+endchoice
+
+config MALI_MIDGARD_DVFS
+	bool "Enable DVFS"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Choose this option to enable DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_RT_PM
+	bool "Enable Runtime power management"
+	depends on MALI_MIDGARD
+	depends on PM_RUNTIME
+	default n
+	help
+	  Choose this option to enable runtime power management in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_MIDGARD_DEBUG_SYS
+	bool "Enable sysfs for the Mali Midgard DDK "
+	depends on MALI_MIDGARD && SYSFS
+	default n
+	help
+	  Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_DEBUG_SHADER_SPLIT_FS
+	bool "Allow mapping of shader cores via sysfs"
+	depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
+	default n
+	help
+	  Select this option to provide a sysfs entry for runtime configuration of shader
+	  core affinity masks.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_VEXPRESS
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_TRACEPOINTS
+	bool "Enable GPU tracepoints"
+	depends on MALI_MIDGARD && ANDROID
+	select GPU_TRACEPOINTS
+	help
+	  Enables GPU tracepoints using Android trace event definitions.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 00000000000..ded089243e6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/../../../../tests/kutf/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 00000000000..2bef9c25eae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 00000000000..05a7a826c5d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,64 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(wildcard $(MALI_METRICS_PATH)/backend/gpu/mali_kbase_pm_metrics_linux.c),)
+	BACKEND += backend/gpu/mali_kbase_pm_metrics_dummy.c
+else
+	BACKEND += backend/gpu/mali_kbase_pm_metrics_linux.c
+endif
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 00000000000..c8ae87eb84a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 00000000000..92a14fa1bae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,22 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 00000000000..42069fc88a1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 00000000000..3791457f605
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,284 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	*target_freq = freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_freq = freq;
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS;
+#endif
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	dev_dbg(kbdev->dev, "Init Mali devfreq\n");
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (callbacks) {
+
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				callbacks);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 00000000000..c0bf8b15b3b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 00000000000..83d5ec9f7a9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+	writel(value, kbdev->reg + offset);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	val = readl(kbdev->reg + offset);
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 00000000000..5b20445932f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 00000000000..72a98d0f795
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 00000000000..bc7235f5833
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 00000000000..2c987071a77
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,536 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	struct kbasep_js_device_data *js_devdata;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+
+	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	js_devdata = &kbdev->js_data;
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+	/* Remember all the settings for suspend/resume */
+	if (&kbdev->hwcnt.suspended_state != setup)
+		memcpy(&kbdev->hwcnt.suspended_state, setup,
+					sizeof(kbdev->hwcnt.suspended_state));
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+					(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT)
+					| PRFCNT_CONFIG_MODE_OFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+				PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING ||
+			kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_RESETTING &&
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING));
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+	/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+	 * and the instrumentation state hasn't been restored yet -
+	 * kbasep_reset_timeout_worker() will do the rest of the work */
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a
+		 * reset, and the instrumentation state hasn't been restored yet
+		 * - kbasep_reset_timeout_worker() will do the rest of the work
+		 */
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 00000000000..23bd80a5a15
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* kbasep_reset_timeout_worker() has started (but not compelted) a
+	 * reset. This generally indicates the current action should be aborted,
+	 * and kbasep_reset_timeout_worker() will handle the cleanup */
+	KBASE_INSTR_STATE_RESETTING,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 00000000000..e96aeae786e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 00000000000..8781561e73d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 00000000000..49c72f90aac
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,471 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+
+
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 00000000000..f2167887229
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,385 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!as_js_kctx_info->ctx.is_scheduled) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&new_address_space->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 00000000000..1598fce4c28
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ *
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+
+	/* Set when we're about to reset the GPU */
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+
+	/* Work queue and work item for performing the reset in */
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 00000000000..508394ea025
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1477 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+#endif
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	{
+		u64 mask;
+		u32 value;
+
+		if (0 == js)
+			mask = mali_js0_affinity_mask;
+		else if (1 == js)
+			mask = mali_js1_affinity_mask;
+		else
+			mask = mali_js2_affinity_mask;
+
+		value = katom->affinity & (mask & 0xFFFFFFFF);
+
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+								value, kctx);
+
+		value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF);
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+								value, kctx);
+	}
+#else
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+#endif
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+	cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	cfg |= JS_CONFIG_START_MMU;
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 * In such cases, we'll try to make a better approximation in the IRQ
+	 * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_tl_ret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js]);
+#endif
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		/* Account for any IRQ Throttle time - makes an overestimate of
+		 * the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp,
+					KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp,
+							katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring
+	 * until the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles =
+				atomic_read(&kbdev->irq_throttle_cycles);
+
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+						irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+					kbase_tlstream_aux_job_softstop(i);
+#endif
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+								u16 core_reqs)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+								u16 core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+						(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+	}
+
+	if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+						(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) &&
+					action == JS_COMMAND_SOFT_STOP) {
+		int i;
+
+		for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js);
+									i++) {
+			struct kbase_jd_atom *katom;
+
+			katom = kbase_gpu_inspect(kbdev, js, i);
+
+			KBASE_DEBUG_ASSERT(katom);
+
+			/* For HW_ISSUE_8316, only 'bad' jobs attacking the
+			 * system can cause this issue: normally, all memory
+			 * should be allocated in multiples of 4 pages, and
+			 * growable memory should be changed size in multiples
+			 * of 4 pages.
+			 *
+			 * Whilst such 'bad' jobs can be cleared by a GPU reset,
+			 * the locking up of a uTLB entry caused by the bad job
+			 * could also stall other ASs, meaning that other ASs'
+			 * jobs don't complete in the 'grace' period before the
+			 * reset. We don't want to lose other ASs' jobs when
+			 * they would normally complete fine, so we must 'poke'
+			 * the MMU regularly to help other ASs complete */
+			kbase_as_poking_timer_retain_atom(kbdev, katom->kctx,
+									katom);
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (action == JS_COMMAND_SOFT_STOP)
+			action = (target_katom->atom_flags &
+						KBASE_KATOM_FLAGS_JOBCHAIN) ?
+					JS_COMMAND_SOFT_STOP_1 :
+					JS_COMMAND_SOFT_STOP_0;
+		else
+			action = (target_katom->atom_flags &
+						KBASE_KATOM_FLAGS_JOBCHAIN) ?
+					JS_COMMAND_HARD_STOP_1 :
+					JS_COMMAND_HARD_STOP_0;
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority)
+			kbase_job_slot_softstop(kbdev, js, katom);
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled == false);
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	bool stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+		kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+		(kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) &&
+		(target_katom == NULL || target_katom->core_req &
+						BASE_JD_REQ_FS_AFBC)))) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, core_reqs) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_uk_hwcnt_setup *hwcnt_setup)
+{
+	hwcnt_setup->dump_buffer =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) &
+								0xffffffff;
+	hwcnt_setup->dump_buffer |= (u64)
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) <<
+									32;
+	hwcnt_setup->jm_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
+	hwcnt_setup->shader_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
+	hwcnt_setup->tiler_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
+	hwcnt_setup->mmu_l2_bm =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
+	enum kbase_instr_state bckp_state;
+	bool try_schedule = false;
+	bool restore_hwc = false;
+
+	u32 mmu_irq_mask;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		return;
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+		/* the same interrupt handler preempted itself */
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	/* Save the HW counters setup */
+	if (kbdev->hwcnt.kctx != NULL) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+		if (kctx->jctx.sched_info.ctx.is_scheduled) {
+			kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup);
+
+			restore_hwc = true;
+		}
+	}
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	kbase_debug_dump_registers(kbdev);
+
+	bckp_state = kbdev->hwcnt.backend.state;
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+	kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask);
+	/* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Restore the HW counters setup */
+	if (restore_hwc) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+				PRFCNT_CONFIG_MODE_OFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+				hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+				hwcnt_setup.dump_buffer >> 32,        kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+				hwcnt_setup.jm_bm,                    kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+				hwcnt_setup.shader_bm,                kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+				hwcnt_setup.mmu_l2_bm,                kctx);
+
+		/* Due to PRLAM-8186 we need to disable the Tiler before we
+		 * enable the HW counter dump. */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						0, kctx);
+		else
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+				PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+		/* If HW has PRLAM-8186 we can now re-enable the tiler HW
+		 * counters dump */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+						hwcnt_setup.tiler_bm, kctx);
+	}
+	kbdev->hwcnt.backend.state = bckp_state;
+	switch (kbdev->hwcnt.backend.state) {
+	/* Cases for waking kbasep_cache_clean_worker worker */
+	case KBASE_INSTR_STATE_CLEANED:
+		/* Cache-clean IRQ occurred, but we reset:
+		 * Wakeup incase the waiter saw RESETTING */
+	case KBASE_INSTR_STATE_REQUEST_CLEAN:
+		/* After a clean was requested, but before the regs were
+		 * written:
+		 * Wakeup incase the waiter saw RESETTING */
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+	case KBASE_INSTR_STATE_CLEANING:
+		/* Either:
+		 * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
+		 * 2) We got it whilst resetting: it was voluntarily lost
+		 *
+		 * So, move to the next state and wakeup: */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		break;
+
+	/* Cases for waking anyone else */
+	case KBASE_INSTR_STATE_DUMPING:
+		/* If dumping, abort the dump, because we may've lost the IRQ */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+	case KBASE_INSTR_STATE_DISABLED:
+	case KBASE_INSTR_STATE_IDLE:
+	case KBASE_INSTR_STATE_FAULT:
+		/* Every other reason: wakeup in that state */
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+		break;
+
+	/* Unhandled cases */
+	case KBASE_INSTR_STATE_RESETTING:
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_backend_reset(kbdev, &end_timestamp);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	kbase_pm_context_idle(kbdev);
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 00000000000..3b959d5b447
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,185 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					u16 core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+#endif
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 00000000000..1bf9c709752
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1407 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev: Device pointer
+ * @js:    Job slot to remove atom from
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+							int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * runpool_irq.lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom
+		 * so it can update its GPU utilisation metrics. */
+		kbase_pm_metrics_release_atom(kbdev, katom);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		kbasep_js_job_check_deref_cores(kbdev, katom);
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
+{
+	return kbdev->js_data.runpool_irq.secure_mode;
+}
+
+static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot enable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to secure mode */
+		err = kbdev->secure_ops->secure_mode_enable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
+		else
+			kbdev->js_data.runpool_irq.secure_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ONCE(!kbdev->secure_ops,
+			"Cannot disable secure mode: secure callbacks not specified.\n");
+
+	if (kbdev->secure_ops) {
+		/* Switch GPU to non-secure mode */
+		err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
+		else
+			kbdev->js_data.runpool_irq.secure_mode = false;
+	}
+
+	return err;
+}
+
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+				if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
+					int err = 0;
+
+					/* Not in correct mode, take action */
+					if (kbase_gpu_atoms_submitted_any(kbdev)) {
+						/*
+						 * We are not in the correct
+						 * GPU mode for this job, and
+						 * we can't switch now because
+						 * there are jobs already
+						 * running.
+						 */
+						break;
+					}
+
+					/* No jobs running, so we can switch GPU mode right now */
+					if (kbase_jd_katom_is_secure(katom[idx])) {
+						err = kbase_gpu_secure_mode_enable(kbdev);
+					} else {
+						err = kbase_gpu_secure_mode_disable(kbdev);
+					}
+
+					if (err) {
+						/* Failed to switch secure mode, fail atom */
+						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+						break;
+					}
+				}
+
+				/* Secure mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
+					"Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
+					kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_secure(katom[idx]) && js == 0) ||
+					!kbase_jd_katom_is_secure(katom[idx]),
+					"Secure atom on JS%d not supported", js);
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_run_atom(kbdev, katom[idx]);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_gpu_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js);
+
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx) {
+			kbase_gpu_dequeue_atom(kbdev, js);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+				katom_idx0->gpu_rb_state !=
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i);
+
+				if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					katom_idx1->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+								js_string),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_gpu_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < 2; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js, 0);
+
+			if (katom) {
+				enum kbase_atom_gpu_rb_state gpu_rb_state =
+							katom->gpu_rb_state;
+
+				kbase_gpu_release_atom(kbdev, katom);
+				kbase_gpu_dequeue_atom(kbdev, js);
+
+				if (gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					katom->event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+					kbase_jm_complete(kbdev, katom,
+								end_timestamp);
+				} else {
+					katom->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+							katom);
+				}
+			}
+		}
+	}
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+					(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+					(!kctx || katom_idx1->kctx == kctx));
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx1->kctx);
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			kbasep_js_clear_submit_allowed(js_devdata,
+							katom_idx0->kctx);
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+static void kbasep_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+
+	kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	if (katom->need_cache_flush_cores_retained) {
+		kbasep_gpu_cacheclean(kbdev, katom);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 00000000000..13d600c47b1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu.h
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * @param[in] kbdev         Device pointer
+ * @param[in] js            Job slot to evict from
+ * @return                  true if job evicted from NEXT registers
+ *                          false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * Complete an atom on job slot js
+ *
+ * @param[in] kbdev         Device pointer
+ * @param[in] js            Job slot that has completed
+ * @param[in] event_code    Event code from job that has completed
+ * @param[in] end_timestamp Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * Inspect the contents of the HW access ringbuffer
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] js     Job slot to inspect
+ * @param[in] idx    Index into ringbuffer. 0 is the job currently running on
+ *                   the slot, 1 is the job waiting, all other values are
+ *                   invalid.
+ * @return           The atom at that position in the ringbuffer
+ *                   NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ *
+ * @param[in] kbdev  Device pointer
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
+/**
+ * Print the contents of the slot ringbuffers
+ *
+ * @param[in] kbdev  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 00000000000..1e29d5fb223
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.c
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		/* Tiler only job, bit 0 needed to enable tiler but no shader
+		 * cores required */
+		*affinity = 1;
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask;
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask;
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask;
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask;
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required */
+	if (core_req & BASE_JD_REQ_T)
+		*affinity = *affinity | 1;
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 00000000000..541e554983e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.h
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/* Import the external affinity mask variables */
+extern u64 mali_js0_affinity_mask;
+extern u64 mali_js1_affinity_mask;
+extern u64 mali_js2_affinity_mask;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_affinity Affinity Manager internal APIs.
+ * @{
+ *
+ */
+
+/**
+ * @brief Decide whether it is possible to submit a job to a particular job slot
+ * in the current status
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * @param kbdev The kbase device structure of the device
+ * @param js    Job slot number to check for allowance
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js);
+
+/**
+ * @brief Compute affinity for a given job.
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ *
+ * @param[out] affinity       Affinity bitmap computed
+ * @param kbdev The kbase device structure of the device
+ * @param katom Job chain of which affinity is going to be found
+ * @param js    Slot the job chain is being submitted
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * @brief Determine whether a proposed \a affinity on job slot \a js would
+ * cause a violation of affinity restrictions.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * @brief Affinity tracking: retain cores used by a slot
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * @brief Affinity tracking: release cores used by a slot
+ *
+ * Cores \b must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * @brief Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+	  /** @} *//* end group kbase_js_affinity */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 00000000000..04bfa519039
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,316 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * runpool_irq lock, so it's OK to
+					 * observe the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 00000000000..3c101e4320d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 00000000000..e06dc58621f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,295 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+	/* Wait for the MMU status to indicate there is no active command. */
+	while (--max_loops && kbase_reg_read(kbdev,
+			MMU_AS_REG(as_nr, AS_STATUS),
+			kctx) & AS_STATUS_AS_ACTIVE) {
+		;
+	}
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	u32 pf_bf_mask;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 00000000000..a8b3b76f47e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for the direct implementation for MMU hardware access
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_direct_page Direct MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_direct_intro_sec Introduction
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ * @ref mali_kbase_mmu_hw_page .
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * @addtogroup mali_kbase_mmu_hw
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw_direct Direct register access to MMU
+ * @{
+ */
+
+/** @brief Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the @ref kbase_device.
+ *
+ * @param[in]  kbdev          kbase context to clear the fault from.
+ * @param[in]  irq_stat       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** @} *//* end group mali_kbase_mmu_hw_direct */
+/** @} *//* end group mali_kbase_mmu_hw */
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 00000000000..d18ae86f8dd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->shader_present_bitmap;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 00000000000..f9d244b01bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 00000000000..bbf39b0ce03
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,375 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess.c
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+	pm_runtime_enable(kbdev->dev);
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+	pm_runtime_disable(kbdev->dev);
+#endif
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.power_change_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+	bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.backend.desired_shader_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and
+	 * all cache flushes should already have occurred */
+
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	return kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, false)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_gpu_slot_update(kbdev);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+	}
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
+{
+	kbdev->pm.debug_core_mask = new_core_mask;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	if (!kbase_pm_do_poweroff(kbdev, true)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 00000000000..521958aa174
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca.c
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/** The number of policies available in the system.
+ *  This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 00000000000..ee9e751f2d7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 00000000000..f890a64e8bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca_fixed.c
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->shader_present_bitmap;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/** The @ref struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 00000000000..a763155cb70
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 00000000000..3decd0312b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.c
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->shader_present_bitmap;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/** The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 00000000000..dd1e8f710b3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.h
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_coarse_demand {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 00000000000..aad6c49b0c3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,555 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess_defs.h
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/** The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to:
+ * - @ref kbase_pm_get_present_cores
+ * - @ref kbase_pm_get_active_cores
+ * - @ref kbase_pm_get_trans_cores
+ * - @ref kbase_pm_get_ready_cores.
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows @ref core_type_to_reg function to be simpler and more
+ * efficient.
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,	    /**< The L2 cache */
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,   /**< Shader cores */
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO	    /**< Tiler cores */
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power management framework.
+ *
+ *  @vsync_hit: indicates if a framebuffer update occured since the last vsync.
+ *          A framebuffer driver is expected to provide this information by
+ *          checking at each vsync if the framebuffer was updated and calling
+ *          kbase_pm_vsync_callback() if there was a change of status.
+ *  @utilisation: percentage indicating GPU load (0-100).
+ *          The utilisation is the fraction of time the GPU was powered up
+ *          and busy. This is based on the time_busy and time_idle metrics.
+ *  @util_gl_share: percentage of GPU load related to OpenGL jobs (0-100).
+ *          This is based on the busy_gl and time_busy metrics.
+ *  @util_cl_share: percentage of GPU load related to OpenCL jobs (0-100).
+ *          This is based on the busy_cl and time_busy metrics.
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. This is a portion of
+ *           the @nr_in_slots value.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. This is a portion of
+ *           the @nr_in_slots value.
+ *  @nr_in_slots: Total number of jobs currently submitted to the GPU across
+ *           all job slots. Maximum value would be 2*BASE_JM_MAX_NR_SLOTS
+ *           (one in flight and one in the JSn_HEAD_NEXT register for each
+ *           job slot).
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	int vsync_hit;
+	int utilisation;
+	int util_gl_share;
+	int util_cl_share[2]; /* 2 is a max number of core groups we can have */
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx;
+	u8 nr_in_slots;
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+/** Actions for DVFS.
+ *
+ * kbase_pm_get_dvfs_action will return one of these enumerated values to
+ * describe the action that the DVFS system should take.
+ */
+enum kbase_pm_dvfs_action {
+	KBASE_PM_DVFS_NOP,	    /* < No change in clock frequency is
+				     *   requested */
+	KBASE_PM_DVFS_CLOCK_UP,	    /* < The clock frequency should be increased
+				     *   if possible */
+	KBASE_PM_DVFS_CLOCK_DOWN    /* < The clock frequency should be decreased
+				     *   if possible */
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_backend_data {
+	/**
+	 * The policy that is currently actively controlling core availability.
+	 *
+	 * @note: During an IRQ, this can be NULL when the policy is being
+	 * changed with kbase_pm_ca_set_policy(). The change is protected under
+	 * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+	 * context must therefore check for NULL. If NULL, then
+	 * kbase_pm_ca_set_policy() will re-issue the policy functions that
+	 * would've been done under IRQ.
+	 */
+	const struct kbase_pm_ca_policy *ca_current_policy;
+
+	/**
+	 * The policy that is currently actively controlling the power state.
+	 *
+	 * @note: During an IRQ, this can be NULL when the policy is being
+	 * changed with kbase_pm_set_policy(). The change is protected under
+	 * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+	 * context must therefore check for NULL. If NULL, then
+	 * kbase_pm_set_policy() will re-issue the policy functions that
+	 * would've been done under IRQ.
+	 */
+	const struct kbase_pm_policy *pm_current_policy;
+
+	/** Private data for current CA policy */
+	union kbase_pm_ca_policy_data ca_policy_data;
+
+	/** Private data for current PM policy */
+	union kbase_pm_policy_data pm_policy_data;
+
+	/**
+	 * Flag indicating when core availability policy is transitioning cores.
+	 * The core availability policy must set this when a change in core
+	 * availability is occuring.
+	 *
+	 * power_change_lock must be held when accessing this. */
+	bool ca_in_transition;
+
+	/** Waiting for reset and a queue to wait for changes */
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+
+	/** Wait queue for whether the l2 cache has been powered as requested */
+	wait_queue_head_t l2_powered_wait;
+	/** State indicating whether all the l2 caches are powered.
+	 * Non-zero indicates they're *all* powered
+	 * Zero indicates that some (or all) are not powered */
+	int l2_powered;
+
+	/** The reference count of active gpu cycle counter users */
+	int gpu_cycle_counter_requests;
+	/** Lock to protect gpu_cycle_counter_requests */
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	/**
+	 * A bit mask identifying the shader cores that the power policy would
+	 * like to be on. The current state of the cores may be different, but
+	 * there should be transitions in progress that will eventually achieve
+	 * this state (assuming that the policy doesn't change its mind in the
+	 * mean time).
+	 */
+	u64 desired_shader_state;
+	/**
+	 * A bit mask indicating which shader cores are currently in a power-on
+	 * transition
+	 */
+	u64 powering_on_shader_state;
+	/**
+	 * A bit mask identifying the tiler cores that the power policy would
+	 * like to be on. @see kbase_pm_device_data:desired_shader_state
+	 */
+	u64 desired_tiler_state;
+	/**
+	 * A bit mask indicating which tiler core are currently in a power-on
+	 * transition
+	 */
+	u64 powering_on_tiler_state;
+
+	/**
+	 * A bit mask indicating which l2-caches are currently in a power-on
+	 * transition
+	 */
+	u64 powering_on_l2_state;
+
+	/**
+	 * This flag is set if the GPU is powered as requested by the
+	 * desired_xxx_state variables
+	 */
+	bool gpu_in_desired_state;
+	/* Wait queue set when gpu_in_desired_state != 0 */
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	/**
+	 * Set to true when the GPU is powered and register accesses are
+	 * possible, false otherwise
+	 */
+	bool gpu_powered;
+
+	/** Set to true when instrumentation is enabled, false otherwise */
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	/**
+	 * Debug state indicating whether sufficient initialization of the
+	 * driver has occurred to handle IRQs
+	 */
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/**
+	 * Spinlock that must be held when:
+	 * - writing gpu_powered
+	 * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds)
+	 */
+	spinlock_t gpu_powered_lock;
+
+	/** Structure to hold metrics for the GPU */
+
+	struct kbasep_pm_metrics_data metrics;
+
+	/**
+	 * Set to the number of poweroff timer ticks until the GPU is powered
+	 * off
+	 */
+	int gpu_poweroff_pending;
+
+	/**
+	 * Set to the number of poweroff timer ticks until shaders are powered
+	 * off
+	 */
+	int shader_poweroff_pending_time;
+
+	/** Timer for powering off GPU */
+	struct hrtimer gpu_poweroff_timer;
+
+	struct workqueue_struct *gpu_poweroff_wq;
+
+	struct work_struct gpu_poweroff_work;
+
+	/** Bit mask of shaders to be powered off on next timer callback */
+	u64 shader_poweroff_pending;
+
+	/**
+	 * Set to true if the poweroff timer is currently running,
+	 * false otherwise
+	 */
+	bool poweroff_timer_needed;
+
+	/**
+	 * Callback when the GPU needs to be turned on. See
+	 * @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 1 if GPU state was lost, 0 otherwise
+	 */
+	int (*callback_power_on)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback when the GPU may be turned off. See
+	 * @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_off)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback when a suspend occurs and the GPU needs to be turned off.
+	 * See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback when a resume occurs and the GPU needs to be turned on.
+	 * See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback when the GPU needs to be turned on. See
+	 * @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 1 if GPU state was lost, 0 otherwise
+	 */
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback when the GPU may be turned off. See
+	 * @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+
+};
+
+
+/** List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ */
+struct kbase_pm_policy {
+	/** The name of this policy */
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * @return The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * @return true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	/** Field indicating flags for this policy */
+	kbase_pm_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ */
+struct kbase_pm_ca_policy {
+	/** The name of this policy */
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occuring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @param kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * @return The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @param kbdev               The kbase device structure for the device
+	 *                            (must be a valid pointer)
+	 * @param cores_ready         The mask of cores currently powered and
+	 *                            ready to run jobs
+	 * @param cores_transitioning The mask of cores currently transitioning
+	 *                            power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	/** Field indicating flags for this policy */
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 00000000000..4e5b79c9c44
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.c
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/**
+ * The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 00000000000..8b7a17ab984
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.h
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_demand {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 00000000000..1d72fd60143
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1264 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_driver.c
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_pm.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/**
+ * Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * @ref core_type_to_reg function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * Decode a core type and action to a register.
+ *
+ * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined
+ * by @ref kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the \c _LO
+ * register and an offset must be applied to use the \c _HI register.
+ *
+ * @param core_type The type of core
+ * @param action    The type of action
+ *
+ * @return The register offset of the \c _LO register that performs an action of
+ * type \c action on a core of type \c core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+
+/** Invokes an action on a core set
+ *
+ * This function performs the action given by \c action on a set of cores of a
+ * type given by \c core_type. It is a static function used by
+ * @ref kbase_pm_transition_core_type
+ *
+ * @param kbdev     The kbase device structure of the device
+ * @param core_type The type of core that the action should be performed on
+ * @param cores     A bit mask of cores to perform the action on (low 32 bits)
+ * @param action    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		kbase_tlstream_aux_pm_state(core_type, state);
+	}
+#endif
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * Get information about a core set
+ *
+ * This function gets information (chosen by \c action) about a set of cores of
+ * a type given by \c core_type. It is a static function used by @ref
+ * kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref
+ * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores.
+ *
+ * @param kbdev     The kbase device structure of the device
+ * @param core_type The type of core that the should be queried
+ * @param action    The property of the cores to query
+ *
+ * @return A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_present_bitmap =
+		kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT);
+	kbdev->tiler_present_bitmap =
+		kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT);
+	kbdev->l2_present_bitmap =
+		kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT);
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * Get the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->l2_present_bitmap;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->shader_present_bitmap;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->tiler_present_bitmap;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * Get the cores that are "active" (busy processing work)
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * Get the cores that are transitioning between power states
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * Get the cores that are powered on
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * Perform power transitions for a particular core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @param kbdev             The kbase device
+ * @param type              The core type to perform transitions for
+ * @param desired_state     A bit mask of the desired state of the cores
+ * @param in_use            A bit mask of the cores that are currently running
+ *                          jobs. These cores have to be kept powered up because
+ *                          there are jobs running (or about to run) on them.
+ * @param[out] available    Receives a bit mask of the cores that the job
+ *                          scheduler can use to submit jobs to. May be NULL if
+ *                          this is not needed.
+ * @param[in,out] powering_on Bit mask to update with cores that are
+ *                            transitioning to a power-on state.
+ *
+ * @return true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state_in_use == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * Determine which caches should be on for a particular core state.
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @param present       The bit mask of present caches
+ * @param cores_powered A bit mask of cores (or L2 caches) that are desired to
+ *                      be powered
+ *
+ * @return A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+	u64 cores_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->l2_present_bitmap;
+
+	desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap,
+								cores_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state) {
+		desired_l2_state |= 1;
+		/* Also enable tiler if l2 cache is powered */
+		kbdev->pm.backend.desired_tiler_state =
+			kbdev->tiler_present_bitmap;
+	} else {
+		kbdev->pm.backend.desired_tiler_state = 0;
+	}
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_L2, desired_l2_state, 0,
+				&l2_available_bitmap,
+				&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) !=
+						kbdev->tiler_present_bitmap) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		kbase_tlstream_aux_pm_state(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#endif
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool cores_are_available;
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* Wait for cores */
+	wait_event(kbdev->pm.backend.gpu_in_desired_state_wait,
+					kbdev->pm.backend.gpu_in_desired_state);
+
+	/* Log timelining information that a change in state has completed */
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		if (js_devdata->runpool_irq.per_as_data[i].kctx)
+			kbase_mmu_update(
+				js_devdata->runpool_irq.per_as_data[i].kctx);
+		else
+			kbase_mmu_disable_as(kbdev, i);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
+	}
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+	/* Enable alternative hardware counter selection if configured. */
+	if (DEFAULT_ALTERNATIVE_HWC)
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	if (kbdev->hw_quirks_sc)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+				kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+}
+
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	/* The GPU still hasn't reset, give up */
+	return -EINVAL;
+
+out:
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		kbase_pm_release_l2_caches(kbdev);
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+	return 0;
+}
+
+/**
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * @ref kbase_pm_request_gpu_cycle_counter or
+ * @ref kbase_pm_request_gpu_cycle_counter_l2_is_on only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (@ref
+ * kbase_pm_request_l2_caches or @ref kbase_pm_request_l2_caches_l2_on)
+ *
+ * @param kbdev     The kbase device structure of the device
+ *
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 00000000000..a4bd11a1c0f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,557 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess_internal.h
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * Get details of the cores that are present in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param type  The type of core (see the @ref enum kbase_pm_core_type
+ *              enumeration)
+ *
+ * @return The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param type  The type of core (see the @ref enum kbase_pm_core_type
+ *              enumeration)
+ *
+ * @return The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently transitioning between power
+ * states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param type  The type of core (see the @ref enum kbase_pm_core_type
+ *              enumeration)
+ *
+ * @return The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param type  The type of core (see the @ref enum kbase_pm_core_type
+ *              enumeration)
+ *
+ * @return The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * Turn the clock for the device on, and enable device interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid
+ *                  pointer)
+ * @param is_resume true if clock on due to resume after suspend,
+ *                  false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * Disable device interrupts, and turn the clock for the device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @param kbdev      The kbase device structure for the device (must be a valid
+ *                   pointer)
+ * @param is_suspend true if clock off due to suspend, false otherwise
+ *
+ * @return true  if clock was turned off
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * Enable interrupts on the device, using the provided mask to set MMU_IRQ_MASK.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param mask  The mask to use for MMU_IRQ_MASK
+ */
+void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask);
+
+/**
+ * Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * Check if there are any power transitions to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were <em>un</em>available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device::pm::power_change_lock
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @return      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ * @return      false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * Synchronous and locking variant of kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * @note There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device::pm::power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * Variant of kbase_pm_update_cores_state() where the caller must hold
+ * kbase_device::pm::power_change_lock
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * Update the desired state of shader cores from the Power Policy, and begin
+ * any power transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * Cancel any pending requests to power off the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init) after calling this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * Function to be called by the frame buffer driver to update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of buffer_updated differs from a previous call.
+ *
+ * @param kbdev          The kbase device structure for the device (must be a
+ *                       valid pointer)
+ * @param buffer_updated True if the buffer has been updated on this VSync,
+ *                       false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * Configure the frame buffer device to set the vsync callback.
+ *
+ * This function should do whatever is necessary for this integration to ensure
+ * that kbase_pm_report_vsync is called appropriately.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev);
+
+/**
+ * Free any resources that kbase_pm_register_vsync_callback allocated.
+ *
+ * This function should perform any cleanup required from the call to
+ * kbase_pm_register_vsync_callback. No call backs should occur after this
+ * function has returned.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev);
+
+/**
+ * Determine whether the DVFS system should change the clock speed of the GPU.
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating. It will return one of three
+ * enumerated values of kbase_pm_dvfs_action:
+ *
+ * @param kbdev                     The kbase device structure for the device
+ *                                  (must be a valid pointer)
+ * @retval KBASE_PM_DVFS_NOP        The clock does not need changing
+ * @retval KBASE_PM_DVFS_CLOCK_UP   The clock frequency should be increased if
+ *                                  possible.
+ * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if
+ *                                  possible.
+ */
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * Mark that the GPU cycle counter is needed, if the caller is the first caller
+ * then the GPU cycle counters will be enabled along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * @ref kbase_pm_context_active must have been called).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * This is a version of the above function
+ * (@ref kbase_pm_request_gpu_cycle_counter) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * @ref kbase_pm_context_active must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * Mark that the GPU cycle counter is no longer in use, if the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read/write() at this
+ * stage, not kbase_reg_read/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * @ref kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * @ref kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * Check if the power management metrics collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @return      true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * Power on the GPU, and any cores that are requested.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid
+ *                  pointer)
+ * @param is_resume true if power on due to resume after suspend,
+ *                  false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * Power off the GPU, and any cores that have been requested.
+ *
+ * @param kbdev      The kbase device structure for the device (must be a valid
+ *                   pointer)
+ * @param is_suspend true if power off due to suspend,
+ *                   false otherwise
+ * @return true      if power was turned off
+ *         false     if power can not be turned off due to pending page/bus
+ *                   fault workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param utilisation   The current calculated utilisation by the metrics
+ *                      system.
+ * @param util_gl_share The current calculated gl share of utilisation.
+ * @param util_cl_share The current calculated cl share of utilisation per core
+ *                      group.
+ * @return              Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * Inform the metrics system that an atom is about to be run.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param katom The atom that is about to be run
+ */
+void kbase_pm_metrics_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * Inform the metrics system that an atom has been run and is being released.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @param katom The atom that is about to be released
+ */
+void kbase_pm_metrics_release_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 00000000000..b209c505bbd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,532 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics.c
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	enum kbase_pm_dvfs_action action;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	action = kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+	kbdev->pm.backend.metrics.vsync_hit = 0;
+	kbdev->pm.backend.metrics.utilisation = 0;
+	kbdev->pm.backend.metrics.util_cl_share[0] = 0;
+	kbdev->pm.backend.metrics.util_cl_share[1] = 0;
+	kbdev->pm.backend.metrics.util_gl_share = 0;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+	kbdev->pm.backend.metrics.nr_in_slots = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_register_vsync_callback(kbdev);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_unregister_vsync_callback(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/**
+ * kbasep_pm_record_gpu_active - update metrics tracking GPU active time
+ *
+ * This function updates the time the GPU was busy executing jobs in
+ * general and specifically for CL and GL jobs. Call this function when
+ * a job is submitted or removed from the GPU (job issue) slots.
+ *
+ * The busy time recorded is the time passed since the last time the
+ * busy/idle metrics were updated (e.g. by this function,
+ * kbasep_pm_record_gpu_idle or others).
+ *
+ * Note that the time we record towards CL and GL jobs accounts for
+ * the total number of CL and GL jobs active at that time. If 20ms
+ * has passed and 3 GL jobs were active, we account 3*20 ms towards
+ * the GL busy time. The number of CL/GL jobs active is tracked by
+ * kbase_pm_metrics_run_atom() / kbase_pm_metrics_release_atom().
+ *
+ * The kbdev->pm.backend.metrics.lock needs to be held when calling
+ * this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+static void kbasep_pm_record_gpu_active(struct kbase_device *kbdev)
+{
+	ktime_t now = ktime_get();
+	ktime_t diff;
+	u32 ns_time;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	/* Record active time */
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+	kbdev->pm.backend.metrics.time_busy += ns_time;
+	kbdev->pm.backend.metrics.busy_gl += ns_time *
+				kbdev->pm.backend.metrics.active_gl_ctx;
+	kbdev->pm.backend.metrics.busy_cl[0] += ns_time *
+				kbdev->pm.backend.metrics.active_cl_ctx[0];
+	kbdev->pm.backend.metrics.busy_cl[1] += ns_time *
+				kbdev->pm.backend.metrics.active_cl_ctx[1];
+	/* Reset time period */
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+/**
+ * kbasep_pm_record_gpu_idle - update metrics tracking GPU idle time
+ *
+ * This function updates the time the GPU was idle (not executing any
+ * jobs) based on the time passed when kbasep_pm_record_gpu_active()
+ * was called last to record the last job on the GPU finishing.
+ *
+ * Call this function when no jobs are in the job slots of the GPU and
+ * a job is about to be submitted to a job slot.
+ *
+ * The kbdev->pm.backend.metrics.lock needs to be held when calling
+ * this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+static void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev)
+{
+	ktime_t now = ktime_get();
+	ktime_t diff;
+	u32 ns_time;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	/* Record idle time */
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+	kbdev->pm.backend.metrics.time_idle += ns_time;
+	/* Reset time period */
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.vsync_hit = buffer_updated;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_report_vsync);
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		kbdev->pm.backend.metrics.busy_cl[0] += ns_time *
+				kbdev->pm.backend.metrics.active_cl_ctx[0];
+		kbdev->pm.backend.metrics.busy_cl[1] += ns_time *
+				kbdev->pm.backend.metrics.active_cl_ctx[1];
+		kbdev->pm.backend.metrics.busy_gl += ns_time *
+				kbdev->pm.backend.metrics.active_gl_ctx;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+							int *util_gl_share,
+							int util_cl_share[2])
+{
+	int utilisation;
+	int busy;
+	ktime_t now = ktime_get();
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	return utilisation;
+}
+
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	enum kbase_pm_dvfs_action action;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+								util_cl_share);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		action = KBASE_PM_DVFS_NOP;
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+	if (kbdev->pm.backend.metrics.vsync_hit) {
+		/* VSync is being met */
+		if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_DOWN;
+		else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_UP;
+		else
+			action = KBASE_PM_DVFS_NOP;
+	} else {
+		/* VSync is being missed */
+		if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_DOWN;
+		else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_UP;
+		else
+			action = KBASE_PM_DVFS_NOP;
+	}
+
+	kbdev->pm.backend.metrics.utilisation = utilisation;
+	kbdev->pm.backend.metrics.util_cl_share[0] = util_cl_share[0];
+	kbdev->pm.backend.metrics.util_cl_share[1] = util_cl_share[1];
+	kbdev->pm.backend.metrics.util_gl_share = util_gl_share;
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return action;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action);
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/* called when job is submitted to a GPU slot */
+void kbase_pm_metrics_run_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	/* We may have been idle before */
+	if (kbdev->pm.backend.metrics.nr_in_slots == 0) {
+		WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[0] != 0);
+		WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[1] != 0);
+		WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx != 0);
+
+		/* Record idle time */
+		kbasep_pm_record_gpu_idle(kbdev);
+
+		/* We are now active */
+		WARN_ON(kbdev->pm.backend.metrics.gpu_active);
+		kbdev->pm.backend.metrics.gpu_active = true;
+
+	} else {
+		/* Record active time */
+		kbasep_pm_record_gpu_active(kbdev);
+	}
+
+	/* Track number of jobs in GPU slots */
+	WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == U8_MAX);
+	kbdev->pm.backend.metrics.nr_in_slots++;
+
+	/* Track if it was a CL or GL one that was submitted to a GPU slot */
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		int device_nr = (katom->core_req &
+				BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+				? katom->device_nr : 0;
+		KBASE_DEBUG_ASSERT(device_nr < 2);
+		kbdev->pm.backend.metrics.active_cl_ctx[device_nr]++;
+	} else {
+		kbdev->pm.backend.metrics.active_gl_ctx++;
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+/* called when job is removed from a GPU slot */
+void kbase_pm_metrics_release_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbasep_pm_record_gpu_active(kbdev);
+
+	/* Track number of jobs in GPU slots */
+	WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == 0);
+	kbdev->pm.backend.metrics.nr_in_slots--;
+
+	/* We may become idle */
+	if (kbdev->pm.backend.metrics.nr_in_slots == 0) {
+		KBASE_DEBUG_ASSERT(kbdev->pm.backend.metrics.gpu_active);
+		kbdev->pm.backend.metrics.gpu_active = false;
+	}
+
+	/* Track of the GPU jobs that are active which ones are CL and
+	 * which GL */
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		int device_nr = (katom->core_req &
+				BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+				? katom->device_nr : 0;
+		KBASE_DEBUG_ASSERT(device_nr < 2);
+
+		WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[device_nr]
+				== 0);
+		kbdev->pm.backend.metrics.active_cl_ctx[device_nr]--;
+	} else {
+		WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx == 0);
+		kbdev->pm.backend.metrics.active_gl_ctx--;
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c
new file mode 100644
index 00000000000..4ee35bbc427
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics_dummy.c
+ * Dummy Metrics for power management.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* no VSync metrics will be available */
+	kbdev->pm.backend.metrics.platform_data = NULL;
+}
+
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 00000000000..ba5c23928b8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,895 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_policy.c
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/** The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+		if (kbdev->pm.backend.shader_poweroff_pending) {
+			kbdev->pm.backend.shader_poweroff_pending_time--;
+
+			KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+			if (!kbdev->pm.backend.shader_poweroff_pending_time) {
+				u64 prev_shader_state =
+					kbdev->pm.backend.desired_shader_state;
+
+				kbdev->pm.backend.desired_shader_state &=
+				~kbdev->pm.backend.shader_poweroff_pending;
+
+				kbdev->pm.backend.shader_poweroff_pending = 0;
+
+				if (prev_shader_state !=
+					kbdev->pm.backend.desired_shader_state
+					|| kbdev->pm.backend.ca_in_transition) {
+					bool cores_are_available;
+
+					KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+					cores_are_available =
+					kbase_pm_check_transitions_nolock(
+									kbdev);
+					KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+					/* Don't need 'cores_are_available',
+					 * because we don't return anything */
+					CSTD_UNUSED(cores_are_available);
+				}
+			}
+		}
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		/* Power off the GPU */
+		if (!kbase_pm_do_poweroff(kbdev, false)) {
+			/* GPU can not be powered off at present */
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		if (kbdev->pm.backend.gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending)
+				return;
+		}
+
+		if (!kbdev->pm.backend.poweroff_timer_needed &&
+					!kbdev->pm.backend.gpu_powered) {
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		kbase_pm_do_poweron(kbdev, false);
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+
+		if (kbdev->pm.backend.shader_poweroff_pending) {
+			kbdev->pm.backend.shader_poweroff_pending = 0;
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+		}
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+
+		/* Request power off */
+		if (kbdev->pm.backend.gpu_powered) {
+			kbdev->pm.backend.gpu_poweroff_pending =
+					kbdev->pm.poweroff_gpu_ticks;
+			if (!kbdev->pm.backend.poweroff_timer_needed) {
+				/* Start timer if not running (eg if power
+				 * policy has been changed from always_on to
+				 * something else). This will ensure the GPU is
+				 * actually powered off */
+				kbdev->pm.backend.poweroff_timer_needed = true;
+				hrtimer_start(
+					&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+			}
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+
+	desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	/* Enable core 0 if tiler required, regardless of core availability */
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_bitmap |= 1;
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+				kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending != 0) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		if (kbdev->pm.backend.shader_poweroff_pending == 0)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/** Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 00000000000..fabb3cc970e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: true if the job can be submitted to the hardware or false
+ *         if the job is not ready to run.
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 00000000000..be7c3663e2b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 00000000000..4ae1b0c304c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 00000000000..35ff2f1ce4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 00000000000..7ae05c2f8de
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 00000000000..159b993b7d6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 00000000000..bec9a5acf5c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 00000000000..7bd30dd7278
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,786 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 00000000000..21fe319b202
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1613 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+typedef u64 base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* BASE_MEM_HINT flags have been removed, but their values are reserved
+	 * for backwards compatibility with older user-space drivers. The values
+	 * can be re-used once support for r5p0 user-space drivers is removed,
+	 * presumably in r7p0.
+	 *
+	 * RESERVED: (1U << 5)
+	 * RESERVED: (1U << 6)
+	 * RESERVED: (1U << 7)
+	 * RESERVED: (1U << 8)
+	 */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable, if available */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+
+/* IN */
+	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+							Outer shareable, required. */
+	BASE_MEM_SECURE = (1U << 16)           /**< Secure memory */
+
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 17
+
+/**
+  * A mask for all output bits, excluding IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+  * A mask for all input bits, including IN/OUT bits.
+  */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+
+/**
+ * @brief Memory types supported by @a base_mem_import
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/** UMP import. Handle type is ump_secure_id. */
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	/** UMM import. Handle type is a file descriptor (int) */
+	BASE_MEM_IMPORT_TYPE_UMM = 2
+} base_mem_import_type;
+
+/* legacy API wrappers */
+#define base_tmem_import_type          base_mem_import_type
+#define BASE_TMEM_IMPORT_TYPE_INVALID  BASE_MEM_IMPORT_TYPE_INVALID
+#define BASE_TMEM_IMPORT_TYPE_UMP      BASE_MEM_IMPORT_TYPE_UMP
+#define BASE_TMEM_IMPORT_TYPE_UMM      BASE_MEM_IMPORT_TYPE_UMM
+
+/**
+ * @brief Invalid memory handle type.
+ * Return value from functions returning @a base_mem_handle on error.
+ */
+#define BASE_MEM_INVALID_HANDLE                (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE      (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3,	    /**< Resize attempted on buffer while it was mapped, which is not permitted */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u16 base_jd_core_req;
+
+/* Requirements that come from the HW */
+#define BASE_JD_REQ_DEP 0	    /**< No requirement, dependency only */
+#define BASE_JD_REQ_FS  (1U << 0)   /**< Requires fragment shaders */
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  (1U << 1)
+#define BASE_JD_REQ_T   (1U << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  (1U << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   (1U << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  (1U << 13)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  (1U << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               (1U << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   (1U << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        (1U << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ *
+ * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag,
+ * allowing specific jobs to be marked as 'Only Compute' instead of the entire context
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   (1U << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+
+/**
+* These requirement bits are currently unused in base_jd_core_req (currently a u16)
+*/
+
+#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5)
+#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15)
+
+/**
+* Mask of all the currently unused requirement bits in base_jd_core_req.
+*/
+
+#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \
+				BASEP_JD_REQ_RESERVED_BIT15)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+				BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER))
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	const struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[5];
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;		    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(const struct base_dependency *const_dep, base_atom_id id, base_jd_dep_type dep_type)
+{
+	struct base_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+	/* make sure we don't set not allowed combinations of atom_id/dependency_type */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep = (struct base_dependency *)const_dep;
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(const struct base_dependency *const_dep, const struct base_dependency *from)
+{
+	LOCAL_ASSERT(const_dep != NULL);
+
+	base_jd_atom_dep_set(const_dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up a external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#ifdef __KERNEL__
+/*
+ * The following typedefs should be removed when a midg types header is added.
+ * See MIDCOM-1657 for details.
+ */
+typedef u32 gpu_product_id;
+typedef u32 gpu_cache_features;
+typedef u32 gpu_tiler_features;
+typedef u32 gpu_mem_features;
+typedef u32 gpu_mmu_features;
+typedef u32 gpu_js_features;
+typedef u32 gpu_as_present;
+typedef u32 gpu_js_present;
+
+#define GPU_MAX_JOB_SLOTS 16
+
+#else
+#include <gpu/mali_gpu_registers.h>
+#include <gpu/mali_gpu_props.h>
+#endif
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "gpu/mali_gpu.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "gpu/mali_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "gpu/mali_gpu.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	gpu_product_id product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	gpu_mem_features coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 unused_1; /* keep for backward compatibility */
+
+	gpu_cache_features l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	gpu_mem_features mem_features;
+	gpu_mmu_features mmu_features;
+
+	gpu_as_present as_present;
+
+	u32 js_present;
+	gpu_js_features js_features[GPU_MAX_JOB_SLOTS];
+	gpu_tiler_features tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	u32 coherency_features;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as @ref basep_context_private_flags, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1),
+
+	/** Base context flag indicating a 'hint' that this context uses Compute
+	 * Jobs only.
+	 *
+	 * Specifially, this means that it only sends atoms that <b>do not</b>
+	 * contain the following @ref base_jd_core_req :
+	 * - BASE_JD_REQ_FS
+	 * - BASE_JD_REQ_T
+	 *
+	 * Violation of these requirements will cause the Job-Chains to be rejected.
+	 *
+	 * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs
+	 * of the following @ref gpu_job_type (whilst it may work now, it may not
+	 * work in future) :
+	 * - @ref GPU_JOB_VERTEX
+	 * - @ref GPU_JOB_GEOMETRY
+	 *
+	 * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE
+	 * requirement in atoms.
+	 */
+	BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+	  ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	(((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+	  ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+enum basep_context_private_flags {
+	/** Private flag tracking whether job descriptor dumping is disabled */
+	BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
+};
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+
+	u8 padding[4];
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100644
index 00000000000..a24791f4af7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 00000000000..4a98a72cc37
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 00000000000..be454a216a3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 00000000000..be3527820fa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,499 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_security.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphone and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The js_data.runpool_irq.lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 00000000000..933aa285469
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 00000000000..099a2986167
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 00000000000..51f934404e5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,61 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+#ifdef CONFIG_MALI_CACHE_COHERENT
+	/* Cache is completely coherent at hardware level. So always allocate
+	 * cached memory.
+	 */
+	cache_flags |= KBASE_REG_CPU_CACHED;
+#else
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+#endif /* (CONFIG_MALI_CACHE_COHERENT) */
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 00000000000..d2487fdf21d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+#include "mali_kbase_device_internal.h"
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 00000000000..fb615ae02ea
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 00000000000..5fb226dfd68
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,327 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#if !MALI_CUSTOMER_RELEASE
+/* This flag is set for internal builds so we can run tests without credentials. */
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1
+#else
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0
+#endif
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required.  This function will be called
+	 * before any other callbacks listed in the struct kbase_attribute
+	 * struct (such as Power Management callbacks).
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. This function will be called
+	 * after any other callbacks listed in the struct kbase_attribute struct
+	 * (such as Power Management callbacks).
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int erro code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 00000000000..ce5d0703911
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for using alternative hardware counters.
+ */
+#define DEFAULT_ALTERNATIVE_HWC false
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 00000000000..e066b286c51
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,274 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+
+#define MEMPOOL_PAGES 16384
+
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int mali_err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->infinite_cache_active = 0;
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+
+	if (kbase_mem_allocator_init(&kctx->osalloc, MEMPOOL_PAGES, kctx->kbdev) != 0)
+		goto free_kctx;
+
+	kctx->pgd_allocator = &kctx->osalloc;
+	atomic_set(&kctx->used_pages, 0);
+
+	if (kbase_jd_init(kctx))
+		goto free_allocator;
+
+	mali_err = kbasep_js_kctx_init(kctx);
+	if (mali_err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	mali_err = kbase_event_init(kctx);
+	if (mali_err)
+		goto free_jd;
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+
+	mali_err = kbase_mmu_init(kctx);
+	if (mali_err)
+		goto free_event;
+
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
+
+	if (kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page) != 0)
+		goto no_sink_page;
+
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	if (kbase_region_tracker_init(kctx))
+		goto no_region_tracker;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	return kctx;
+
+no_region_tracker:
+no_sink_page:
+	kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+free_mmu:
+	kbase_mmu_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+free_allocator:
+	kbase_mem_allocator_term(&kctx->osalloc);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_allocator_term(&kctx->osalloc);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+	if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0)
+		js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE;
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+			irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 00000000000..8cffa55fa98
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,3930 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_gator.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#include <mali_kbase_cpuprops.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include <mali_kbase_hwaccess_backend.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+
+#include <mali_kbase_config.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_DRV_NAME "mali"
+
+static const char kbase_drv_name[] = KBASE_DRV_NAME;
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+
+static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
+{
+	struct base_external_resource *res = ext_res;
+	int res_id;
+
+	/* assume we have to wait for all */
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_resources = kmalloc_array(num_elems,
+			sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == resources_list->kds_resources)
+		return -ENOMEM;
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_access_bitmap = kzalloc(
+			sizeof(unsigned long) *
+			((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
+			GFP_KERNEL);
+
+	if (NULL == resources_list->kds_access_bitmap) {
+		kfree(resources_list->kds_access_bitmap);
+		return -ENOMEM;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	for (res_id = 0; res_id < num_elems; res_id++, res++) {
+		int exclusive;
+		struct kbase_va_region *reg;
+		struct kds_resource *kds_res = NULL;
+
+		exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			break;
+
+		/* no need to check reg->alloc as only regions with an alloc has
+		 * a size, and kbase_region_tracker_find_region_enclosing_address
+		 * only returns regions with size > 0 */
+		switch (reg->gpu_alloc->type) {
+#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+			kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+			break;
+#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
+		default:
+			break;
+		}
+
+		/* no kds resource for the region ? */
+		if (!kds_res)
+			break;
+
+		resources_list->kds_resources[res_id] = kds_res;
+
+		if (exclusive)
+			set_bit(res_id, resources_list->kds_access_bitmap);
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	/* did the loop run to completion? */
+	if (res_id == num_elems)
+		return 0;
+
+	/* Clean up as the resource list is not valid. */
+	kfree(resources_list->kds_resources);
+	kfree(resources_list->kds_access_bitmap);
+
+	return -EINVAL;
+}
+
+static bool kbasep_validate_kbase_pointer(
+		struct kbase_context *kctx, union kbase_pointer *p)
+{
+	if (kctx->is_compat) {
+		if (p->compat_value == 0)
+			return false;
+	} else {
+		if (NULL == p->value)
+			return false;
+	}
+	return true;
+}
+
+static int kbase_external_buffer_lock(struct kbase_context *kctx,
+		struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
+{
+	struct base_external_resource *ext_res_copy;
+	size_t ext_resource_size;
+	int ret = -EINVAL;
+	int fd = -EBADF;
+	struct base_external_resource __user *ext_res_user;
+	int __user *file_desc_usr;
+	struct kbasep_kds_resource_set_file_data *fdata;
+	struct kbase_kds_resource_list_data resource_list_data;
+
+	if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
+		return -EINVAL;
+
+	/* Check user space has provided valid data */
+	if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
+			!kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
+			(0 == args->num_res) ||
+			(args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
+		return -EINVAL;
+
+	ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
+
+	KBASE_DEBUG_ASSERT(0 != ext_resource_size);
+	ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
+
+	if (!ext_res_copy)
+		return -EINVAL;
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat) {
+		ext_res_user = compat_ptr(args->external_resource.compat_value);
+		file_desc_usr = compat_ptr(args->file_descriptor.compat_value);
+	} else {
+#endif /* CONFIG_COMPAT */
+		ext_res_user = args->external_resource.value;
+		file_desc_usr = args->file_descriptor.value;
+#ifdef CONFIG_COMPAT
+	}
+#endif /* CONFIG_COMPAT */
+
+	/* Copy the external resources to lock from user space */
+	if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size))
+		goto out;
+
+	/* Allocate data to be stored in the file */
+	fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+
+	if (!fdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Parse given elements and create resource and access lists */
+	ret = kbasep_kds_allocate_resource_list_data(kctx,
+			ext_res_copy, args->num_res, &resource_list_data);
+	if (!ret) {
+		long err;
+
+		fdata->lock = NULL;
+
+		fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
+
+		err = copy_to_user(file_desc_usr, &fd, sizeof(fd));
+
+		/* If the file descriptor was valid and we successfully copied
+		 * it to user space, then we can try and lock the requested
+		 * kds resources.
+		 */
+		if ((fd >= 0) && (0 == err)) {
+			struct kds_resource_set *lock;
+
+			lock = kds_waitall(args->num_res,
+					resource_list_data.kds_access_bitmap,
+					resource_list_data.kds_resources,
+					KDS_WAIT_BLOCKING);
+
+			if (IS_ERR_OR_NULL(lock)) {
+				ret = -EINVAL;
+			} else {
+				ret = 0;
+				fdata->lock = lock;
+			}
+		} else {
+			ret = -EINVAL;
+		}
+
+		kfree(resource_list_data.kds_resources);
+		kfree(resource_list_data.kds_access_bitmap);
+	}
+
+	if (ret) {
+		/* If the file was opened successfully then close it which will
+		 * clean up the file data, otherwise we clean up the file data
+		 * ourself.
+		 */
+		if (fd >= 0)
+			sys_close(fd);
+		else
+			kfree(fdata);
+	}
+out:
+	kfree(ext_res_copy);
+
+	return ret;
+}
+#endif /* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		gpu_js_features *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, (u32)*lpu);
+	}
+
+	/* Create GPU object and make it retain all LPUs. */
+	kbase_tlstream_tl_summary_new_gpu(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+	}
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		kbase_tlstream_tl_summary_new_ctx(
+				element->kctx,
+				(u32)(element->kctx->id));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream. */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space. */
+	kbase_tlstream_flush_streams();
+}
+#endif
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+	switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case 6:
+		/* We are backwards compatible with version 6,
+		 * so pretend to be the old version */
+		version->major = 6;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case 7:
+		/* We are backwards compatible with version 7,
+		 * so pretend to be the old version */
+		version->major = 7;
+		version->minor = 1;
+		break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				(int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		struct uku_version_check_args *version_check;
+
+		if (args_size != sizeof(struct uku_version_check_args)) {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			return 0;
+		}
+		version_check = (struct uku_version_check_args *)args;
+		kbase_api_handshake(version_check);
+		/* save the proposed version number for later use */
+		kctx->api_version = KBASE_API_VERSION(version_check->major,
+				version_check->minor);
+		ukh->ret = MALI_ERROR_NONE;
+		return 0;
+	}
+
+	/* block calls until version handshake */
+	if (kctx->api_version == 0)
+		return -EINVAL;
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		struct kbase_uk_set_flags *kbase_set_flags;
+
+		/* setup pending, try to signal that we'll do the setup,
+		 * if setup was already in progress, err this call
+		 */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1))
+			return -EINVAL;
+
+		/* if unexpected call, will stay stuck in setup mode
+		 * (is it the only call we accept?)
+		 */
+		if (id != KBASE_FUNC_SET_FLAGS)
+			return -EINVAL;
+
+		kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+		/* if not matching the expected call, stay in setup mode */
+		if (sizeof(*kbase_set_flags) != args_size)
+			goto bad_size;
+
+		/* if bad flags, will stay stuck in setup mode */
+		if (kbase_context_set_create_flags(kctx,
+				kbase_set_flags->create_flags) != 0)
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+		atomic_set(&kctx->setup_complete, 1);
+		return 0;
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages,
+					mem->commit_pages, mem->extent,
+					&mem->flags, &mem->gpu_va,
+					&mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT:
+		{
+			struct kbase_uk_mem_import *mem_import = args;
+			int __user *phandle;
+			int handle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			switch (mem_import->type) {
+			case BASE_MEM_IMPORT_TYPE_UMP:
+				get_user(handle, phandle);
+				break;
+			case BASE_MEM_IMPORT_TYPE_UMM:
+				get_user(handle, phandle);
+				break;
+			default:
+				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+				break;
+			}
+
+			if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID ||
+					kbase_mem_import(kctx, mem_import->type,
+					handle, &mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (!alias->nents) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)&commit->result_subcode))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_query(kctx, query->gpu_addr,
+					query->query, &query->value) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+			if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (kbase_jd_submit(kctx, job, 1) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifndef CONFIG_MALI_CACHE_COHERENT
+			if (kbase_sync_now(kctx, &sn->sset) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_instr_hwcnt_setup(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_instr_hwcnt_dump(kctx) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			mutex_lock(&kctx->vinstr_cli_lock);
+			if (kbase_vinstr_clear(kbdev->vinstr_ctx,
+					kctx->vinstr_cli) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			mutex_unlock(&kctx->vinstr_cli_lock);
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE:
+		{
+			struct kbase_uk_cpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_cpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				int err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_stream_create(screate->name, &screate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			if (kbase_fence_validate(fence_validate->fd) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_EXT_BUFFER_LOCK:
+		{
+#ifdef CONFIG_KDS
+			switch (kbase_external_buffer_lock(kctx,
+				(struct kbase_uk_ext_buff_kds_data *)args,
+				args_size)) {
+			case 0:
+				ukh->ret = MALI_ERROR_NONE;
+				break;
+			case -ENOMEM:
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			default:
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+#endif /* CONFIG_KDS */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (job_atom_inject_error(&params) != 0)
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			if (gpu_model_control(kbdev->model, &params) != 0)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			else
+				ukh->ret = MALI_ERROR_NONE;
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (!buf)
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+			kbasep_mem_profile_debugfs_insert(kctx, buf,
+					add_data->len);
+
+			break;
+		}
+#ifdef CONFIG_MALI_MIPE_ENABLED
+	case KBASE_FUNC_TLSTREAM_ACQUIRE:
+		{
+			struct kbase_uk_tlstream_acquire *tlstream_acquire =
+				args;
+
+			if (sizeof(*tlstream_acquire) != args_size)
+				goto bad_size;
+
+			if (0 != kbase_tlstream_acquire(
+						kctx,
+						&tlstream_acquire->fd)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else if (0 <= tlstream_acquire->fd) {
+				/* Summary stream was cleared during acquire.
+				 * Create static timeline objects that will be
+				 * read by client. */
+				kbase_create_timeline_objects(kctx);
+			}
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_FLUSH:
+		{
+			struct kbase_uk_tlstream_flush *tlstream_flush =
+				args;
+
+			if (sizeof(*tlstream_flush) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_flush_streams();
+			break;
+		}
+#if MALI_UNIT_TEST
+	case KBASE_FUNC_TLSTREAM_TEST:
+		{
+			struct kbase_uk_tlstream_test *tlstream_test = args;
+
+			if (sizeof(*tlstream_test) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_test(
+					tlstream_test->tpw_count,
+					tlstream_test->msg_delay,
+					tlstream_test->msg_count,
+					tlstream_test->aux_msg);
+			break;
+		}
+	case KBASE_FUNC_TLSTREAM_STATS:
+		{
+			struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+			if (sizeof(*tlstream_stats) != args_size)
+				goto bad_size;
+
+			kbase_tlstream_stats(
+					&tlstream_stats->bytes_collected,
+					&tlstream_stats->bytes_generated);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+	/* used to signal the job core dump on fault has terminated and release the
+	 * refcount of the context to let it be removed. It requires at least
+	 * BASE_UK_VERSION_MAJOR to be 8 and BASE_UK_VERSION_MINOR to be 1 in the
+	 * UK interface.
+	 */
+	case KBASE_FUNC_DUMP_FAULT_TERM:
+		{
+#if 2 == MALI_INSTRUMENTATION_LEVEL
+			if (atomic_read(&kctx->jctx.sched_info.ctx.fault_count) > 0 &&
+				kctx->jctx.sched_info.ctx.is_scheduled)
+
+				kbasep_js_dump_fault_term(kbdev, kctx);
+
+			break;
+#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */
+
+			/* This IOCTL should only be called when instr=2 at compile time. */
+			goto out_bad;
+		}
+
+	case KBASE_FUNC_GET_CONTEXT_ID:
+		{
+			struct kbase_uk_context_id *info = args;
+
+			info->id = kctx->id;
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u", id);
+		goto out_bad;
+	}
+
+	return 0;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+
+	kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_CACHE_COHERENT
+	 /* if cache is completely coherent at hardware level, then remove the
+	  * infinite cache control support from debugfs.
+	  */
+#else
+	debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+			&kctx->infinite_cache_active);
+#endif /* CONFIG_MALI_CACHE_COHERENT */
+	kbasep_mem_profile_debugfs_add(kctx);
+
+	kbasep_jd_debugfs_ctx_add(kctx);
+	kbase_debug_mem_view_init(filp);
+#endif
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+#ifdef CONFIG_MALI_MIPE_ENABLED
+			kbase_tlstream_tl_new_ctx(
+					element->kctx,
+					(u32)(element->kctx->id));
+#endif
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+	kbase_tlstream_tl_del_ctx(kctx);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbasep_mem_profile_debugfs_remove(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	kbase_vinstr_detach_client(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (kbase_dispatch(kctx, &msg, size) != 0)
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx)))
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)))
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+#ifdef CONFIG_64BIT
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+
+	if (!kctx->is_compat && !addr &&
+		kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+		unsigned long low_limit, high_limit, gap_start, gap_end;
+
+		/* Hardware has smaller VA than userspace, ensure the page
+		 * comes from a VA which can be used on the GPU */
+
+		gap_end = (1UL<<33);
+		if (gap_end < len)
+			return -ENOMEM;
+		high_limit = gap_end - len;
+		low_limit = PAGE_SIZE + len;
+
+		gap_start = mm->highest_vm_end;
+		if (gap_start <= high_limit)
+			goto found_highest;
+
+		if (RB_EMPTY_ROOT(&mm->mm_rb))
+			return -ENOMEM;
+		vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+		if (vma->rb_subtree_gap < len)
+			return -ENOMEM;
+
+		while (true) {
+			gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+			if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+				struct vm_area_struct *right =
+					rb_entry(vma->vm_rb.rb_right,
+						 struct vm_area_struct, vm_rb);
+				if (right->rb_subtree_gap >= len) {
+					vma = right;
+					continue;
+				}
+			}
+check_current:
+			gap_end = vma->vm_start;
+			if (gap_end < low_limit)
+				return -ENOMEM;
+			if (gap_start <= high_limit &&
+			    gap_end - gap_start >= len)
+				goto found;
+
+			if (vma->vm_rb.rb_left) {
+				struct vm_area_struct *left =
+					rb_entry(vma->vm_rb.rb_left,
+						 struct vm_area_struct, vm_rb);
+
+				if (left->rb_subtree_gap >= len) {
+					vma = left;
+					continue;
+				}
+			}
+			while (true) {
+				struct rb_node *prev = &vma->vm_rb;
+
+				if (!rb_parent(prev))
+					return -ENOMEM;
+				vma = rb_entry(rb_parent(prev),
+						struct vm_area_struct, vm_rb);
+				if (prev == vma->vm_rb.rb_right) {
+					gap_start = vma->vm_prev ?
+						vma->vm_prev->vm_end : 0;
+					goto check_current;
+				}
+			}
+		}
+
+found:
+		if (gap_end > (1UL<<33))
+			gap_end = (1UL<<33);
+
+found_highest:
+		gap_end -= len;
+
+		VM_BUG_ON(gap_end < PAGE_SIZE);
+		VM_BUG_ON(gap_end < gap_start);
+		return gap_end;
+	}
+#endif
+	/* No special requirements - fallback to the default version */
+	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask;
+	int rc;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	rc = kstrtoull(buf, 16, &new_core_mask);
+	if (rc)
+		return rc;
+
+	if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask ||
+	    !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) {
+		dev_err(dev, "power_policy: invalid core specification\n");
+		return -EINVAL;
+	}
+
+	if (kbdev->pm.debug_core_mask != new_core_mask) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	}
+
+	return count;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/**
+ * struct sc_split_config
+ * @tag: Short name
+ * @human_readable: Long name
+ * @js0_mask: Mask for job slot 0
+ * @js1_mask: Mask for job slot 1
+ * @js2_mask: Mask for job slot 2
+ *
+ * Structure containing a single shader affinity split configuration.
+ */
+struct sc_split_config {
+	char const *tag;
+	char const *human_readable;
+	u64          js0_mask;
+	u64          js1_mask;
+	u64          js2_mask;
+};
+
+/*
+ * Array of available shader affinity split configurations.
+ */
+static struct sc_split_config const sc_split_configs[] = {
+	/* All must be the first config (default). */
+	{
+		"all", "All cores",
+		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
+	},
+	{
+		"mp1", "MP1 shader core",
+		0x1, 0x1, 0x1
+	},
+	{
+		"mp2", "MP2 shader core",
+		0x3, 0x3, 0x3
+	},
+	{
+		"mp4", "MP4 shader core",
+		0xF, 0xF, 0xF
+	},
+	{
+		"mp1_vf", "MP1 vertex + MP1 fragment shader core",
+		0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL
+	},
+	{
+		"mp2_vf", "MP2 vertex + MP2 fragment shader core",
+		0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL
+	},
+	/* This must be the last config. */
+	{
+		NULL, NULL,
+		0x0, 0x0, 0x0
+	},
+};
+
+/* Pointer to the currently active shader split configuration. */
+static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0];
+
+/** Show callback for the @c sc_split sysfs file
+ *
+ * Returns the current shader core affinity policy.
+ */
+static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	ssize_t ret;
+	/* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed
+	 * to be shorter than that at this time so no length check needed. */
+	ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag);
+	return ret;
+}
+
+/** Store callback for the @c sc_split sysfs file.
+ *
+ * This function is called when the @c sc_split sysfs file is written to
+ * It modifies the system shader core affinity configuration to allow
+ * system profiling with different hardware configurations.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct sc_split_config const *config = &sc_split_configs[0];
+
+	/* Try to match: loop until we hit the last "NULL" entry */
+	while (config->tag) {
+		if (sysfs_streq(config->tag, buf)) {
+			current_sc_split_config = config;
+			mali_js0_affinity_mask  = config->js0_mask;
+			mali_js1_affinity_mask  = config->js1_mask;
+			mali_js2_affinity_mask  = config->js2_mask;
+			dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag);
+			return count;
+		}
+		config++;
+	}
+
+	/* No match found in config list */
+	dev_err(dev, "sc_split: invalid value\n");
+	dev_err(dev, "  Possible settings: mp[1|2|4], mp[1|2]_vf\n");
+	return -ENOENT;
+}
+
+/** The sysfs file @c sc_split
+ *
+ * This is used for configuring/querying the current shader core work affinity
+ * configuration.
+ */
+static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		u64 ticks;
+
+		if (js_soft_stop_ms >= 0) {
+			ticks = js_soft_stop_ms * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks = -1;
+		}
+
+		if (js_soft_stop_ms_cl >= 0) {
+			ticks = js_soft_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_soft_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_soft_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_ss >= 0) {
+			ticks = js_hard_stop_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_ss = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_ss = -1;
+		}
+
+		if (js_hard_stop_ms_cl >= 0) {
+			ticks = js_hard_stop_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_cl = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_cl = -1;
+		}
+
+		if (js_hard_stop_ms_dumping >= 0) {
+			ticks = js_hard_stop_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_hard_stop_ticks_dumping = ticks;
+		} else {
+			kbdev->js_hard_stop_ticks_dumping = -1;
+		}
+
+		if (js_reset_ms_ss >= 0) {
+			ticks = js_reset_ms_ss * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_ss = ticks;
+		} else {
+			kbdev->js_reset_ticks_ss = -1;
+		}
+
+		if (js_reset_ms_cl >= 0) {
+			ticks = js_reset_ms_cl * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_cl = ticks;
+		} else {
+			kbdev->js_reset_ticks_cl = -1;
+		}
+
+		if (js_reset_ms_dumping >= 0) {
+			ticks = js_reset_ms_dumping * 1000000ULL;
+			do_div(ticks, kbdev->js_data.scheduling_period_ns);
+			kbdev->js_reset_ticks_dumping = ticks;
+		} else {
+			kbdev->js_reset_ticks_dumping = -1;
+		}
+
+		kbdev->js_timeouts_updated = true;
+
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks,
+				js_soft_stop_ms);
+		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_soft_stop_ticks_cl,
+				js_soft_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_ss,
+				js_hard_stop_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_hard_stop_ticks_cl,
+				js_hard_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)
+					kbdev->js_hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_ss,
+				js_reset_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_cl,
+				js_reset_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+				(unsigned long)kbdev->js_reset_ticks_dumping,
+				js_reset_ms_dumping);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u64 ms;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	unsigned long ticks;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		scheduling_period_ns = kbdev->js_scheduling_period_ns;
+	else
+		scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = kbdev->js_soft_stop_ticks;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = kbdev->js_soft_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.soft_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = kbdev->js_hard_stop_ticks_ss;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = kbdev->js_hard_stop_ticks_cl;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = kbdev->js_hard_stop_ticks_dumping;
+	else
+		ticks = kbdev->js_data.hard_stop_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_dumping = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = kbdev->js_reset_ticks_ss;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_ss;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_ss = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = kbdev->js_reset_ticks_cl;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_cl;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_cl = (unsigned long)ms;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = kbdev->js_reset_ticks_dumping;
+	else
+		ticks = kbdev->js_data.gpu_reset_ticks_dumping;
+	ms = (u64)ticks * scheduling_period_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_dumping = (unsigned long)ms;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	u64 ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns)
+		old_period = kbdev->js_scheduling_period_ns;
+	else
+		old_period = kbdev->js_data.scheduling_period_ns;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.soft_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+		ticks = (u64)kbdev->js_reset_ticks_ss * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_ss = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+		ticks = (u64)kbdev->js_reset_ticks_cl * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_cl = ticks ? ticks : 1;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+		ticks = (u64)kbdev->js_reset_ticks_dumping * old_period;
+	else
+		ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping *
+				kbdev->js_data.scheduling_period_ns;
+	do_div(ticks, new_scheduling_period_ns);
+	kbdev->js_reset_ticks_dumping = ticks ? ticks : 1;
+
+	kbdev->js_scheduling_period_ns = new_scheduling_period_ns;
+	kbdev->js_timeouts_updated = true;
+
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+		period = kbdev->js_scheduling_period_ns;
+	else
+		period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example:
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+#ifdef MALI_INCLUDE_TMIX
+		{ .id = GPU_ID_PI_TMIX, .name = "Mali-TMIx" },
+#endif /* MALI_INCLUDE_TMIX */
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id;
+	unsigned i;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		if (gpu_product_id_names[i].id == product_id) {
+			product_name = gpu_product_id_names[i].name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	iounmap(kbdev->reg);
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbasep_gpu_memory_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+
+static int kbase_common_device_init(struct kbase_device *kbdev)
+{
+	int err;
+	struct mali_base_gpu_core_props *core_props;
+	enum {
+		inited_mem = (1u << 0),
+		inited_js = (1u << 1),
+		inited_debug = (1u << 2),
+		inited_js_softstop = (1u << 3),
+		inited_js_timeouts = (1u << 4),
+#if !MALI_CUSTOMER_RELEASE
+		inited_force_replay = (1u << 5),
+#endif /* !MALI_CUSTOMER_RELEASE */
+		inited_pm_runtime_init = (1u << 6),
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+		inited_sc_split = (1u << 7),
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+		inited_timeline = (1u << 8),
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#ifdef CONFIG_MALI_DEVFREQ
+		inited_devfreq = (1u << 9),
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_MIPE_ENABLED
+		inited_tlstream = (1u << 10),
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+		inited_backend_early = (1u << 11),
+		inited_backend_late = (1u << 12),
+		inited_device = (1u << 13),
+		inited_gpuinfo = (1u << 14),
+		inited_dvfs_period = (1u << 15),
+		inited_pm_poweroff = (1u << 16),
+		inited_reset_timeout = (1u << 17),
+		inited_js_scheduling_period = (1u << 18),
+		inited_vinstr = (1u << 19)
+	};
+
+	int inited = 0;
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
+	u32 ve_logic_tile = 0;
+#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
+
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+	err = kbase_backend_early_init(kbdev);
+	if (err)
+		goto out_partial;
+	inited |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr++);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+
+	/* For versatile express platforms, min and max values of GPU frequency
+	 * depend on the type of the logic tile; these values may not be known
+	 * at the build time so in some cases a platform config file with wrong
+	 * GPU freguency values may be included; to ensure the correct value of
+	 * min and max GPU frequency is obtained, the type of the logic tile is
+	 * read from the corresponding register on the platform and frequency
+	 * values assigned accordingly.*/
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
+	ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN;
+		core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX;
+		break;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN;
+		core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX;
+		break;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to the config_platform default */
+		core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+		core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+		break;
+	}
+#else
+		core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+		core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
+
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Can't initialize device (%d)\n", err);
+		goto out_partial;
+	}
+
+	inited |= inited_device;
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n");
+		goto out_partial;
+	}
+
+	inited |= inited_vinstr;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (err)
+			goto out_partial;
+
+		inited |= inited_pm_runtime_init;
+	}
+
+	err = kbase_mem_init(kbdev);
+	if (err)
+		goto out_partial;
+
+	inited |= inited_mem;
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err)
+		goto out_partial;
+
+	inited |= inited_js;
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	err = device_create_file(kbdev->dev, &dev_attr_sc_split);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n");
+		goto out_partial;
+	}
+
+	inited |= inited_sc_split;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+#ifdef CONFIG_MALI_DEBUG
+
+	err = device_create_file(kbdev->dev, &dev_attr_debug_command);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_debug;
+
+	err = device_create_file(kbdev->dev, &dev_attr_js_softstop_always);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_js_softstop;
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = device_create_file(kbdev->dev, &dev_attr_js_timeouts);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_js_timeouts;
+
+#if !MALI_CUSTOMER_RELEASE
+	err = device_create_file(kbdev->dev, &dev_attr_force_replay);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_force_replay;
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+	err = device_create_file(kbdev->dev, &dev_attr_gpuinfo);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create gpuinfo sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_gpuinfo;
+
+	err = device_create_file(kbdev->dev, &dev_attr_dvfs_period);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create dvfs_period sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_dvfs_period;
+
+	err = device_create_file(kbdev->dev, &dev_attr_pm_poweroff);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create pm_poweroff sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_pm_poweroff;
+
+	err = device_create_file(kbdev->dev, &dev_attr_reset_timeout);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create reset_timeout sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_reset_timeout;
+
+	err = device_create_file(kbdev->dev, &dev_attr_js_scheduling_period);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create js_scheduling_period sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_js_scheduling_period;
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't initialize timeline stream\n");
+		goto out_partial;
+	}
+	inited |= inited_tlstream;
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+	err = kbase_backend_late_init(kbdev);
+	if (err)
+		goto out_partial;
+	inited |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't initialize devfreq\n");
+		goto out_partial;
+	}
+	inited |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef SECURE_CALLBACKS
+	kbdev->secure_ops = SECURE_CALLBACKS;
+#endif
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err)
+		goto out_partial;
+
+	/* intialise the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't register misc dev %s\n",
+				kbdev->devname);
+		goto out_misc;
+	}
+
+	err = device_create_file(kbdev->dev, &dev_attr_power_policy);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n");
+		goto out_file;
+	}
+
+	err = device_create_file(kbdev->dev,
+			&dev_attr_core_availability_policy);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n");
+		goto out_file_core_availability_policy;
+	}
+
+	err = device_create_file(kbdev->dev, &dev_attr_core_mask);
+	if (err) {
+		dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n");
+		goto out_file_core_mask;
+	}
+
+	{
+		const struct list_head *dev_list = kbase_dev_list_get();
+
+		list_add(&kbdev->entry, &kbase_dev_list);
+		kbase_dev_list_put(dev_list);
+	}
+
+	dev_info(kbdev->dev, "Probed as %s\n",
+			dev_name(kbdev->mdev.this_device));
+
+	return 0;
+
+out_file_core_mask:
+	device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+out_file_core_availability_policy:
+	device_remove_file(kbdev->dev, &dev_attr_power_policy);
+out_file:
+	misc_deregister(&kbdev->mdev);
+out_misc:
+	put_device(kbdev->dev);
+	kbase_device_debugfs_term(kbdev);
+out_partial:
+	if (inited & inited_vinstr)
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+#ifdef CONFIG_MALI_DEVFREQ
+	if (inited & inited_devfreq)
+		kbase_devfreq_term(kbdev);
+#endif /* CONFIG_MALI_DEVFREQ */
+	if (inited & inited_backend_late)
+		kbase_backend_late_term(kbdev);
+#ifdef CONFIG_MALI_MIPE_ENABLED
+	if (inited & inited_tlstream)
+		kbase_tlstream_term();
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+	if (inited & inited_js_scheduling_period)
+		device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period);
+	if (inited & inited_reset_timeout)
+		device_remove_file(kbdev->dev, &dev_attr_reset_timeout);
+	if (inited & inited_pm_poweroff)
+		device_remove_file(kbdev->dev, &dev_attr_pm_poweroff);
+	if (inited & inited_dvfs_period)
+		device_remove_file(kbdev->dev, &dev_attr_dvfs_period);
+#if !MALI_CUSTOMER_RELEASE
+	if (inited & inited_force_replay)
+		device_remove_file(kbdev->dev, &dev_attr_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	if (inited & inited_js_timeouts)
+		device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+#ifdef CONFIG_MALI_DEBUG
+	if (inited & inited_js_softstop)
+		device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+
+	if (inited & inited_debug)
+		device_remove_file(kbdev->dev, &dev_attr_debug_command);
+
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	if (inited & inited_sc_split)
+		device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+	if (inited & inited_gpuinfo)
+		device_remove_file(kbdev->dev, &dev_attr_gpuinfo);
+
+	if (inited & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (inited & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (inited & inited_js)
+		kbasep_js_devdata_term(kbdev);
+
+	if (inited & inited_mem)
+		kbase_mem_term(kbdev);
+
+	if (inited & inited_pm_runtime_init) {
+		if (kbdev->pm.callback_power_runtime_term)
+			kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+
+	if (inited & inited_device)
+		kbase_device_term(kbdev);
+
+	if (inited & inited_backend_early)
+		kbase_backend_early_term(kbdev);
+
+	return err;
+}
+
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct resource *reg_res;
+	int err;
+	int i;
+
+#ifdef CONFIG_OF
+	err = kbase_platform_early_init();
+	if (err) {
+		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		return err;
+	}
+#endif
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Can't allocate device\n");
+		err = -ENOMEM;
+		goto out;
+	}
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Can't initialize dummy model\n");
+		goto out_midg;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	kbdev->dev = &pdev->dev;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			err = -ENOENT;
+			goto out_platform_irq;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			err = -EINVAL;
+			goto out_irq_name;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK);
+	}
+
+	/* the first memory resource is the physical address of the GPU registers */
+	reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!reg_res) {
+		dev_err(kbdev->dev, "Invalid register resource\n");
+		err = -ENOENT;
+		goto out_platform_mem;
+	}
+
+	kbdev->reg_start = reg_res->start;
+	kbdev->reg_size = resource_size(reg_res);
+
+	err = kbase_common_reg_map(kbdev);
+	if (err)
+		goto out_reg_map;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		dev_info(kbdev->dev, "Continuing without Mali regulator control\n");
+		kbdev->regulator = NULL;
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+	pm_runtime_enable(kbdev->dev);
+#endif
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		kbdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n", err);
+			goto out_clock_prepare;
+		}
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+	if (of_init_opp_table(kbdev->dev) < 0)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif
+
+
+	err = kbase_common_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Failed kbase_common_device_init\n");
+		goto out_common_init;
+	}
+	return 0;
+
+out_common_init:
+	clk_disable_unprepare(kbdev->clock);
+out_clock_prepare:
+	clk_put(kbdev->clock);
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+	pm_runtime_disable(kbdev->dev);
+#endif
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	regulator_put(kbdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	kbase_common_reg_unmap(kbdev);
+out_reg_map:
+out_platform_mem:
+#ifdef CONFIG_OF
+out_irq_name:
+#endif
+out_platform_irq:
+#ifdef CONFIG_MALI_NO_MALI
+	gpu_device_destroy(kbdev);
+out_midg:
+#endif /* CONFIG_MALI_NO_MALI */
+	kbase_device_free(kbdev);
+out:
+	return err;
+}
+
+static int kbase_common_device_remove(struct kbase_device *kbdev)
+{
+	kbase_vinstr_term(kbdev->vinstr_ctx);
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+#endif
+#ifdef CONFIG_MALI_DEVFREQ
+	kbase_devfreq_term(kbdev);
+#endif
+
+	kbase_backend_late_term(kbdev);
+
+	if (kbdev->pm.callback_power_runtime_term)
+		kbdev->pm.callback_power_runtime_term(kbdev);
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+	pm_runtime_disable(kbdev->dev);
+#endif
+	device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period);
+	device_remove_file(kbdev->dev, &dev_attr_reset_timeout);
+	device_remove_file(kbdev->dev, &dev_attr_pm_poweroff);
+	device_remove_file(kbdev->dev, &dev_attr_dvfs_period);
+	device_remove_file(kbdev->dev, &dev_attr_power_policy);
+	device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+	device_remove_file(kbdev->dev, &dev_attr_core_mask);
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+	kbase_tlstream_term();
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+#ifdef CONFIG_MALI_DEBUG
+	device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+	device_remove_file(kbdev->dev, &dev_attr_debug_command);
+#endif /* CONFIG_MALI_DEBUG */
+	device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+#if !MALI_CUSTOMER_RELEASE
+	device_remove_file(kbdev->dev, &dev_attr_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+	device_remove_file(kbdev->dev, &dev_attr_gpuinfo);
+
+	kbasep_js_devdata_halt(kbdev);
+	kbase_mem_halt(kbdev);
+
+	kbasep_js_devdata_term(kbdev);
+	kbase_mem_term(kbdev);
+	kbase_backend_early_term(kbdev);
+
+	{
+		const struct list_head *dev_list = kbase_dev_list_get();
+
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+	}
+	misc_deregister(&kbdev->mdev);
+	put_device(kbdev->dev);
+	kbase_common_reg_unmap(kbdev);
+	kbase_device_term(kbdev);
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	regulator_put(kbdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+#ifdef CONFIG_MALI_NO_MALI
+	gpu_device_destroy(kbdev);
+#endif /* CONFIG_MALI_NO_MALI */
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_common_device_remove(kbdev);
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+
+	return ret;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime idle callback from the OS.
+ *
+ * This is called by Linux when the device appears to be inactive and it might be
+ * placed into a low power state
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	/* Avoid pm_runtime_suspend being called */
+	return 1;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* CONFIG_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+#endif
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
new file mode 100644
index 00000000000..5b62539dfba
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
@@ -0,0 +1,125 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_uku.h"
+#include <mali_kbase_config.h>
+#include <mali_kbase_config_defaults.h>
+#include <linux/cache.h>
+#include <linux/cpufreq.h>
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/cputype.h>
+#endif
+
+#define KBASE_DEFAULT_CPU_NUM 0
+
+#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT
+
+/*
+ * Macros used to extract cpu id info
+ * see documentation for Main ID register
+ */
+#define KBASE_CPUPROPS_ID_GET_REV(cpuid)    ((cpuid) & 0x0F)          /* [3:0]   Revision                            */
+#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)(((cpuid) >>  4) & 0xFFF) /* [15:4]  Part number                         */
+#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid)   (((cpuid) >> 16) & 0x0F)  /* [19:16] Architecture                        */
+#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)(((cpuid) >> 20) & 0x0F)  /* [23:20] Variant                             */
+#define KBASE_CPUPROPS_ID_GET_CODE(cpuid)   (((cpuid) >> 24) & 0xFF)  /* [31:23] ASCII code of implementer trademark */
+
+/*Below value sourced from OSK*/
+#define L1_DCACHE_SIZE ((u32)0x00008000)
+
+/**
+ * kbasep_cpuprops_uk_get_cpu_id_info - Retrieves detailed CPU info from given
+ *                                      cpu_val ( ID reg )
+ * @kbase_props: CPU props to be filled-in with cpu id info
+ *
+ */
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+	kbase_props->props.cpu_id.id           = read_cpuid_id();
+
+	kbase_props->props.cpu_id.valid        = 1;
+	kbase_props->props.cpu_id.rev          = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.part         = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.arch         = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.variant      = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.implementer  = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id);
+}
+#else
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+	kbase_props->props.cpu_id.id           = 0;
+	kbase_props->props.cpu_id.valid        = 0;
+	kbase_props->props.cpu_id.rev          = 0;
+	kbase_props->props.cpu_id.part         = 0;
+	kbase_props->props.cpu_id.arch         = 0;
+	kbase_props->props.cpu_id.variant      = 0;
+	kbase_props->props.cpu_id.implementer  = 'N';
+}
+#endif
+
+/*
+ * This function (and file!) is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+int kbase_cpuprops_uk_get_props(struct kbase_context *kctx,
+		struct kbase_uk_cpuprops * const props)
+{
+	unsigned int max_cpu_freq;
+
+	props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2;
+	props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE;
+	props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN;
+
+	props->props.nr_cores = num_possible_cpus();
+	props->props.cpu_page_size_log2 = PAGE_SHIFT;
+	props->props.available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	kbasep_cpuprops_uk_get_cpu_id_info(props);
+
+	/* check if kernel supports dynamic frequency scaling */
+	max_cpu_freq = cpufreq_quick_get_max(KBASE_DEFAULT_CPU_NUM);
+	if (max_cpu_freq != 0) {
+		/* convert from kHz to mHz */
+		props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000;
+	} else {
+		/* fallback if CONFIG_CPU_FREQ turned off */
+		int err;
+		kbase_cpu_clk_speed_func get_clock_speed;
+
+		get_clock_speed = (kbase_cpu_clk_speed_func) CPU_SPEED_FUNC;
+		err = get_clock_speed(&props->props.max_cpu_clock_speed_mhz);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
new file mode 100644
index 00000000000..daa46e502e2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+/**
+ * @file mali_kbase_cpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_CPUPROPS_H_
+#define _KBASE_CPUPROPS_H_
+
+/* Forward declarations */
+struct kbase_uk_cpuprops;
+
+/**
+ * This file is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+/**
+ * @brief Provides CPU properties data.
+ *
+ * Fill the struct kbase_uk_cpuprops with values from CPU configuration.
+ *
+ * @param kctx         The kbase context
+ * @param kbase_props  A copy of the struct kbase_uk_cpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props);
+
+#endif /*_KBASE_CPUPROPS_H_*/
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 00000000000..fb57ac2e31a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 00000000000..5fff2892bb5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 00000000000..1a3198e5b53
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#if CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 00000000000..20ab51a776c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 00000000000..8b107f20cc4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1126 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mem_alloc.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom has fail dependency on same-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom has been submitted to JSCTX ringbuffers */
+#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in secure mode */
+#define KBASE_KATOM_FLAG_SECURE (1<<11)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom * const)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+	/* Atom is not currently present in slot ringbuffer */
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	/* Atom is in slot ringbuffer but is blocked on a previous atom */
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for cores to become
+	 * available */
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	/* Atom is in slot ringbuffer but is blocked on affinity */
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer but is waiting for secure mode switch */
+	KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE,
+	/* Atom is in slot ringbuffer and ready to run */
+	KBASE_ATOM_GPU_RB_READY,
+	/* Atom is in slot ringbuffer and has been submitted to the GPU */
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	/* Atom must be returned to JS as soon as it reaches the head of the
+	 * ringbuffer due to a previous failure */
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* JS atom priority with respect to other atoms on its kctx. */
+	int sched_priority;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	u64 need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	/* Pointer to atom that this atom has cross-slot dependency on */
+	struct kbase_jd_atom *x_pre_dep;
+	/* Pointer to atom that has cross-slot dependency on this atom */
+	struct kbase_jd_atom *x_post_dep;
+
+
+	struct kbase_jd_atom_backend backend;
+};
+
+static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	u32 fault_status;
+	u64 fault_addr;
+	struct mutex transaction_mutex;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	int poke_refcount;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
+	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - true indicates that the transition is ongoing
+	 * Expected to be protected by pm.power_change_lock */
+	bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * A bit mask identifying the available shader cores that are specified
+	 * via sysfs
+	 */
+	u64 debug_core_mask;
+
+	/**
+	 * Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap,
+	 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+	 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+	 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+	 * also held when the hardware power registers are being written to, to
+	 * ensure that two threads do not conflict over the power transitions
+	 * that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_secure_ops - Platform specific functions for GPU secure mode
+ * operations
+ * @secure_mode_enable:  Callback to enable secure mode on the GPU
+ * @secure_mode_disable: Callback to disable secure mode on the GPU
+ */
+struct kbase_secure_ops {
+	/**
+	 * secure_mode_enable() - Enable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_enable)(struct kbase_device *kbdev);
+
+	/**
+	 * secure_mode_disable() - Disable secure mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*secure_mode_disable)(struct kbase_device *kbdev);
+};
+
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif				/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Cached present bitmaps - these are the same as the corresponding hardware registers */
+	u64 shader_present_bitmap;
+	u64 tiler_present_bitmap;
+	u64 l2_present_bitmap;
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_context *suspended_kctx;
+		struct kbase_uk_hwcnt_setup suspended_state;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	/* This is used to override the current job scheduler values for
+	 * JS_SCHEDULING_PERIOD_NS
+	 * JS_SOFT_STOP_TICKS
+	 * JS_SOFT_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_SS
+	 * JS_HARD_STOP_TICKS_CL
+	 * JS_HARD_STOP_TICKS_DUMPING
+	 * JS_RESET_TICKS_SS
+	 * JS_RESET_TICKS_CL
+	 * JS_RESET_TICKS_DUMPING.
+	 *
+	 * These values are set via the js_timeouts sysfs file.
+	 */
+	u32 js_scheduling_period_ns;
+	int js_soft_stop_ticks;
+	int js_soft_stop_ticks_cl;
+	int js_hard_stop_ticks_ss;
+	int js_hard_stop_ticks_cl;
+	int js_hard_stop_ticks_dumping;
+	int js_reset_ticks_ss;
+	int js_reset_ticks_cl;
+	int js_reset_ticks_dumping;
+	bool js_timeouts_updated;
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_MIDGARD_RT_PM
+	struct delayed_work runtime_pm_workqueue;
+#endif
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_device *devfreq_cooling;
+#endif
+#endif
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* Root directory for per context entry */
+	struct dentry *debugfs_ctx_directory;
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* true if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	bool force_replay_random;
+#endif
+
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+
+	struct kbase_hwaccess_data hwaccess;
+
+	/* Count of page/bus faults waiting for workqueues to process */
+	atomic_t faults_pending;
+
+	/* true if GPU is powered off or power off operation is in progress */
+	bool poweroff_pending;
+
+
+	/* defaults for new context created for this device */
+	u32 infinite_cache_active_default;
+
+	/* system coherency mode  */
+	u32 system_coherency;
+
+	/* Secure operations */
+	struct kbase_secure_ops *secure_ops;
+};
+
+/* JSCTX ringbuffer size must always be a power of 2 */
+#define JSCTX_RB_SIZE 256
+#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1)
+
+/**
+ * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer
+ * @atom_id: Atom ID
+ */
+struct jsctx_rb_entry {
+	u16 atom_id;
+};
+
+/**
+ * struct jsctx_rb - JS context atom ring buffer
+ * @entries:     Array of size %JSCTX_RB_SIZE which holds the &struct
+ *               kbase_jd_atom pointers which make up the contents of the ring
+ *               buffer.
+ * @read_idx:    Index into @entries. Indicates the next entry in @entries to
+ *               read, and is incremented when pulling an atom, and decremented
+ *               when unpulling.
+ *               HW access lock must be held when accessing.
+ * @write_idx:   Index into @entries. Indicates the next entry to use when
+ *               adding atoms into the ring buffer, and is incremented when
+ *               adding a new atom.
+ *               jctx->lock must be held when accessing.
+ * @running_idx: Index into @entries. Indicates the last valid entry, and is
+ *               incremented when remving atoms from the ring buffer.
+ *               HW access lock must be held when accessing.
+ *
+ * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom.
+ */
+struct jsctx_rb {
+	struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
+
+	u16 read_idx; /* HW access lock must be held when accessing */
+	u16 write_idx; /* jctx->lock must be held when accessing */
+	u16 running_idx; /* HW access lock must be held when accessing */
+};
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+struct kbase_context {
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	unsigned long api_version;
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct mutex event_mutex;
+	bool event_closed;
+	struct workqueue_struct *event_workq;
+
+	bool is_compat;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	u64 *mmu_teardown_pages;
+
+	phys_addr_t aliasing_sink_page;
+
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_allocator osalloc;
+	struct kbase_mem_allocator *pgd_allocator;
+
+	struct list_head waiting_soft_jobs;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct *process_mm;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Spinlock guarding data */
+	spinlock_t mem_profile_lock;
+	struct dentry *kctx_dentry;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_rb jsctx_rb
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	/* Number of atoms currently pulled from this context */
+	atomic_t atoms_pulled;
+	/* Number of atoms currently pulled from this context, per slot */
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* true if last kick() caused atoms to be pulled from this context */
+	bool pulled;
+	/* true if infinite cache is to be enabled for new allocations. Existing
+	 * allocations will not change. bool stored as a u32 per Linux API */
+	u32 infinite_cache_active;
+	/* Bitmask of slots that can be pulled from */
+	u32 slots_pullable;
+
+	/* true if address space assignment is pending */
+	bool as_pending;
+
+	/* Backend specific data */
+	struct kbase_context_backend backend;
+
+	/* Work structure used for deferred ASID assignment */
+	struct work_struct work;
+
+	/* Only one userspace vinstr client per kbase context */
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 00000000000..01e41462f17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,659 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	mutex_init(&kbdev->as[i].transaction_mutex);
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+#if defined(CONFIG_ARM64)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif /* CONFIG_ARM64 */
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = kbase_mem_lowlevel_init(kbdev);
+	if (err)
+		goto mem_lowlevel_init_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto term_lowlevel_mem;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+		kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+term_lowlevel_mem:
+	kbase_mem_lowlevel_term(kbdev);
+mem_lowlevel_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+
+	kbase_mem_lowlevel_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 00000000000..f70bcccf405
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 00000000000..784acecae17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,195 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+#endif
+
+	mutex_lock(&kctx->jctx.lock);
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+	mutex_unlock(&kctx->jctx.lock);
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+	ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed);
+	mutex_unlock(&ctx->event_mutex);
+
+	return ret;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!ctx->event_closed) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+	uevent->udata = kbase_event_process(ctx, atom);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+static void kbase_event_post_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *atom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_context *ctx = atom->kctx;
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process(ctx, atom);
+		return;
+	}
+
+	mutex_lock(&ctx->event_mutex);
+	list_add_tail(&atom->dep_item[0], &ctx->event_list);
+	mutex_unlock(&ctx->event_mutex);
+
+	kbase_event_wakeup(ctx);
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+	KBASE_DEBUG_ASSERT(ctx->event_workq);
+	KBASE_DEBUG_ASSERT(atom);
+
+	INIT_WORK(&atom->work, kbase_event_post_worker);
+	queue_work(ctx->event_workq, &atom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	kctx->event_closed = true;
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	mutex_init(&kctx->event_mutex);
+	kctx->event_closed = false;
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 00000000000..ce65b5562a2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 00000000000..6ef4e39c5cc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,322 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+#include "mali_kbase_instr.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device  *kbdev;
+	struct kbase_context *kctx;
+	u64 hwcnt_gpu_va;
+	void *hwcnt_cpu_va;
+	struct kbase_vmap_struct hwcnt_map;
+};
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	uint32_t gpu_id;
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+
+	switch (gpu_id) {
+	/* If we are using a Mali-T60x device */
+	case GPU_ID_PI_T60X:
+		hardware_counters = hardware_counters_mali_t60x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t60x);
+		break;
+	/* If we are using a Mali-T62x device */
+	case GPU_ID_PI_T62X:
+		hardware_counters = hardware_counters_mali_t62x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t62x);
+		break;
+	/* If we are using a Mali-T72x device */
+	case GPU_ID_PI_T72X:
+		hardware_counters = hardware_counters_mali_t72x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t72x);
+		break;
+	/* If we are using a Mali-T76x device */
+	case GPU_ID_PI_T76X:
+		hardware_counters = hardware_counters_mali_t76x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t76x);
+		break;
+	/* If we are using a Mali-T82x device */
+	case GPU_ID_PI_T82X:
+		hardware_counters = hardware_counters_mali_t82x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t82x);
+		break;
+	/* If we are using a Mali-T83x device */
+	case GPU_ID_PI_T83X:
+		hardware_counters = hardware_counters_mali_t83x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t83x);
+		break;
+	/* If we are using a Mali-T86x device */
+	case GPU_ID_PI_T86X:
+		hardware_counters = hardware_counters_mali_t86x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t86x);
+		break;
+	/* If we are using a Mali-T88x device */
+	case GPU_ID_PI_TFRX:
+		hardware_counters = hardware_counters_mali_t88x;
+		*total_counters = ARRAY_SIZE(hardware_counters_mali_t88x);
+		 break;
+	default:
+		hardware_counters = NULL;
+		*total_counters = 0;
+		dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id);
+		break;
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+	uint32_t dump_size = 0, i = 0;
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+	u16 va_alignment = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	/* Create a kbase_context */
+	hand->kctx = kbase_create_context(hand->kbdev, true);
+	if (!hand->kctx)
+		goto release_device;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a Mali-T6xx or Mali-T72x device */
+	if (in_out_info->gpu_id == GPU_ID_PI_T60X ||
+	    in_out_info->gpu_id == GPU_ID_PI_T62X ||
+	    in_out_info->gpu_id == GPU_ID_PI_T72X) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+
+	in_out_info->size = dump_size;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	nr_pages = PFN_UP(dump_size);
+	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
+			&flags, &hand->hwcnt_gpu_va, &va_alignment);
+	if (!reg)
+		goto free_layout;
+
+	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
+			dump_size, &hand->hwcnt_map);
+
+	if (!hand->hwcnt_cpu_va)
+		goto free_buffer;
+
+	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
+
+	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
+	setup.dump_buffer = hand->hwcnt_gpu_va;
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+
+	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
+	if (err)
+		goto free_unmap;
+
+	kbase_instr_hwcnt_clear(hand->kctx);
+
+	return hand;
+
+free_unmap:
+	kbase_vunmap(hand->kctx, &hand->hwcnt_map);
+
+free_buffer:
+	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+destroy_context:
+	kbase_destroy_context(hand->kctx);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		kbase_instr_hwcnt_disable(opaque_handles->kctx);
+		kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map);
+		kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va);
+		kbase_destroy_context(opaque_handles->kctx);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+	bool ret_res, success_res;
+
+	if (opaque_handles && success) {
+		ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx,
+				&success_res);
+		*success = (uint32_t)success_res;
+		return (uint32_t)(ret_res != 0);
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		return (kbase_instr_hwcnt_request_dump(
+				opaque_handles->kctx) == 0);
+
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 00000000000..ef9ac0f7b63
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 00000000000..d124e82edd0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2159 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MIESS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MIESS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MIESS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MIESS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MIESS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MIESS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MIESS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 00000000000..ca264049653
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,98 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	ssize_t ret = 0;
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		ret = seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			ret = seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return ret;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 00000000000..3cf30a4e767
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_H
+#define _KBASE_GPU_MEMORY_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 00000000000..4276665fbc9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+	gpu_props->l2_props.num_l2_slices = 1;
+	if (gpu_props->core_props.product_id == GPU_ID_PI_T76X)
+		gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 00000000000..af97d97bf94
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 00000000000..e37e43c2fb0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 suspend_size; /* API 8.2+ */
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 00000000000..1a5a21b3812
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (gpu_id) {
+	case GPU_ID_PI_TFRX:
+		/* FALLTHROUGH */
+	case GPU_ID_PI_T86X:
+		features = base_hw_features_tFxx;
+		break;
+	case GPU_ID_PI_T83X:
+		features = base_hw_features_t83x;
+		break;
+	case GPU_ID_PI_T82X:
+		features = base_hw_features_t82x;
+		break;
+	case GPU_ID_PI_T76X:
+		features = base_hw_features_t76x;
+		break;
+	case GPU_ID_PI_T72X:
+		features = base_hw_features_t72x;
+		break;
+	case GPU_ID_PI_T62X:
+		features = base_hw_features_t62x;
+		break;
+	case GPU_ID_PI_T60X:
+		features = base_hw_features_t60x;
+		break;
+	default:
+		features = base_hw_features_generic;
+		break;
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		switch (gpu_id) {
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+			issues = base_hw_issues_t60x_r0p0_15dev0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+			issues = base_hw_issues_t60x_r0p0_eac;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+			issues = base_hw_issues_t60x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+			issues = base_hw_issues_t62x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+			issues = base_hw_issues_t62x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+			issues = base_hw_issues_t62x_r1p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+			issues = base_hw_issues_t76x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+			issues = base_hw_issues_t76x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+			issues = base_hw_issues_t76x_r0p1_50rel0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+			issues = base_hw_issues_t76x_r0p2;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+			issues = base_hw_issues_t76x_r0p3;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+			issues = base_hw_issues_t76x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+			issues = base_hw_issues_t72x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+			issues = base_hw_issues_t72x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+			issues = base_hw_issues_t72x_r1p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+			issues = base_hw_issues_tFRx_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+			issues = base_hw_issues_tFRx_r0p2;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			issues = base_hw_issues_tFRx_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+			issues = base_hw_issues_tFRx_r2p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+			issues = base_hw_issues_t86x_r0p2;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			issues = base_hw_issues_t86x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+			issues = base_hw_issues_t86x_r2p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+			issues = base_hw_issues_t83x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			issues = base_hw_issues_t83x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+			issues = base_hw_issues_t82x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+			issues = base_hw_issues_t82x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			issues = base_hw_issues_t82x_r1p0;
+			break;
+		default:
+			dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+			return -EINVAL;
+		}
+	} else {
+		/* Software model */
+		switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) {
+		case GPU_ID_PI_T60X:
+			issues = base_hw_issues_model_t60x;
+			break;
+		case GPU_ID_PI_T62X:
+			issues = base_hw_issues_model_t62x;
+			break;
+		case GPU_ID_PI_T72X:
+			issues = base_hw_issues_model_t72x;
+			break;
+		case GPU_ID_PI_T76X:
+			issues = base_hw_issues_model_t76x;
+			break;
+		case GPU_ID_PI_TFRX:
+			issues = base_hw_issues_model_tFRx;
+			break;
+		case GPU_ID_PI_T86X:
+			issues = base_hw_issues_model_t86x;
+			break;
+		case GPU_ID_PI_T83X:
+			issues = base_hw_issues_model_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			issues = base_hw_issues_model_t82x;
+			break;
+		default:
+			dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+			return -EINVAL;
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 00000000000..fce7d29558e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 00000000000..b09be99e6b4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 00000000000..261453e8f1a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx;
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 00000000000..f93ca9d8680
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,35 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 00000000000..5de2b7535bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @setup:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 00000000000..a2fdf247881
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,283 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev:	Device pointer
+ * @as_nr:	Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+						int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned. If kctx->as_pending
+ *         is true then ASID assignment will complete at some point in the
+ *         future and will re-start scheduling, otherwise no ASIDs are available
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold as->transaction_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from jd_done_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ *				  @js
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 00000000000..dbdcd3def22
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask The core mask to use
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+							u64 new_core_mask);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 00000000000..89d26eaf09a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c
new file mode 100644
index 00000000000..9c2043ac948
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx);
+
+	kctx = kbdev->hwcnt.kctx;
+	kbdev->hwcnt.suspended_kctx = kctx;
+
+	/* Relevant state was saved into hwcnt.suspended_state when enabling the
+	 * counters */
+
+	if (kctx) {
+		KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED);
+		kbase_instr_hwcnt_disable(kctx);
+	}
+}
+
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	kctx = kbdev->hwcnt.suspended_kctx;
+	kbdev->hwcnt.suspended_kctx = NULL;
+
+	if (kctx) {
+		int err;
+
+		err = kbase_instr_hwcnt_enable_internal(kbdev, kctx,
+						&kbdev->hwcnt.suspended_state);
+		WARN(err, "Failed to restore instrumented hardware counters on resume\n");
+	}
+}
+
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	struct kbase_device *kbdev;
+	bool access_allowed;
+	int err;
+
+	kbdev = kctx->kbdev;
+
+	/* Determine if the calling task has access to this capability */
+	access_allowed = kbase_security_has_capability(kctx,
+					KBASE_SEC_INSTR_HW_COUNTERS_COLLECT,
+					KBASE_SEC_FLAG_NOAUDIT);
+	if (!access_allowed)
+		return -EINVAL;
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable);
+
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx)
+{
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err)
+		goto out;
+
+	/* Release the context. This had its own Power Manager Active reference
+	 */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable);
+
+int kbase_instr_hwcnt_setup(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	struct kbase_vinstr_context *vinstr_ctx = kctx->kbdev->vinstr_ctx;
+	u32 bitmap[4];
+
+	if (setup == NULL)
+		return -EINVAL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM] = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM] = setup->jm_bm;
+	if (setup->dump_buffer) {
+		if (kctx->vinstr_cli)
+			return -EBUSY;
+		kctx->vinstr_cli = kbase_vinstr_attach_client(vinstr_ctx, false,
+				setup->dump_buffer, bitmap);
+		if (!kctx->vinstr_cli)
+			return -ENOMEM;
+	} else {
+		kbase_vinstr_detach_client(vinstr_ctx, kctx->vinstr_cli);
+		kctx->vinstr_cli = NULL;
+	}
+
+	return 0;
+}
+
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx)
+{
+	return kbase_vinstr_dump(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h
new file mode 100644
index 00000000000..5613453a3c0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_instr.h
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Instrumentation API definitions
+ */
+
+#ifndef _KBASE_INSTR_H_
+#define _KBASE_INSTR_H_
+
+#include <mali_kbase_hwaccess_instr.h>
+
+/**
+ * kbase_instr_hwcnt_setup() - Configure HW counters collection
+ * @kctx:	Kbase context
+ * @setup:	&struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_setup(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_enable() - Enable HW counters collection
+ * @kctx:	Kbase context
+ * @setup:	&struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable() - Disable HW counters collection
+ * @kctx:	Kbase context
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for
+ *                            completion
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection
+ * @kbdev:	Kbase device
+ *
+ * It's assumed that there's only one privileged context.
+ *
+ * Safe to do this without lock when doing an OS suspend, because it only
+ * changes in response to user-space IOCTLs
+ */
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection
+ * @kbdev:	Kbase device
+ */
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 00000000000..c5a86234049
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1726 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat)
+		return compat_ptr(p->compat_value);
+#endif
+	return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else {
+			/* The job has not completed */
+			list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+	struct kbase_device *kbdev;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+	ctx = &katom->kctx->jctx;
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&ctx->lock);
+
+	/* KDS resource has already been satisfied (e.g. due to zapping) */
+	if (katom->kds_dep_satisfied)
+		goto out;
+
+	/* This atom's KDS dependency has now been met */
+	katom->kds_dep_satisfied = true;
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+ out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = true;
+#endif				/* CONFIG_KDS */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+				alloc->imported.umm.current_mapping_usage_count--;
+
+				if (0 == alloc->imported.umm.current_mapping_usage_count) {
+					struct kbase_va_region *reg;
+
+					reg = kbase_region_tracker_find_region_base_address(
+							katom->kctx,
+							katom->extres[res_no].gpu_address);
+
+					if (reg && reg->gpu_alloc == alloc)
+						kbase_mmu_teardown_pages(
+								katom->kctx,
+								reg->start_pfn,
+								kbase_reg_current_backed_size(reg));
+
+					kbase_jd_umm_unmap(katom->kctx, alloc);
+				}
+			}
+#endif	/* CONFIG_DMA_SHARED_BUFFER */
+			kbase_mem_phy_alloc_put(katom->extres[res_no].alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == kds_resources) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL);
+
+	if (NULL == kds_access_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+
+		res = &input_extres[res_no];
+		reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
+			if ((katom->core_req & BASE_JD_REQ_FS) == 0) {
+				WARN_RATELIMIT(1, "Secure non-fragment jobs not supported");
+				goto failed_loop;
+			}
+		}
+
+		/* decide what needs to happen for this resource */
+		switch (reg->gpu_alloc->type) {
+		case BASE_TMEM_IMPORT_TYPE_UMP:
+			{
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+				struct kds_resource *kds_res;
+
+				kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+				if (kds_res)
+					add_kds_resource(kds_res, kds_resources, &kds_res_count,
+							kds_access_bitmap,
+							res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+				break;
+			}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		case BASE_TMEM_IMPORT_TYPE_UMM:
+			{
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+				struct kds_resource *kds_res;
+
+				kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf);
+				if (kds_res)
+					add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+				if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+					/* use a local variable to not pollute err_ret_val
+					 * with a potential success value as some other gotos depend
+					 * on the default error code stored in err_ret_val */
+					int tmp;
+
+					tmp = kbase_jd_umm_map(katom->kctx, reg);
+					if (tmp) {
+						/* failed to map this buffer, roll back */
+						err_ret_val = tmp;
+						reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+						goto failed_loop;
+					}
+				}
+				break;
+			}
+#endif
+		default:
+			goto failed_loop;
+		}
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = false;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = true;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_KDS
+ failed_kds_setup:
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif				/* CONFIG_KDS */
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			alloc->imported.umm.current_mapping_usage_count--;
+
+			if (0 == alloc->imported.umm.current_mapping_usage_count) {
+				struct kbase_va_region *reg;
+
+				reg = kbase_region_tracker_find_region_base_address(
+						katom->kctx,
+						katom->extres[res_no].gpu_address);
+
+				if (reg && reg->gpu_alloc == alloc)
+					kbase_mmu_teardown_pages(katom->kctx,
+							reg->start_pfn,
+							kbase_reg_current_backed_size(reg));
+
+				kbase_jd_umm_unmap(katom->kctx, alloc);
+			}
+		}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+		kbase_mem_phy_alloc_put(alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d,
+					bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) {
+			/* Atom failed, so remove the other dependencies and immediately fail the atom */
+			if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+				list_del(&dep_atom->dep_item[other_d]);
+				kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]);
+			}
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = true;
+			}
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+			dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+			list_add_tail(&dep_atom->dep_item[0], out_list);
+		} else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+#ifdef CONFIG_KDS
+			if (dep_atom->kds_dep_satisfied)
+#endif
+				list_add_tail(&dep_atom->dep_item[0], out_list);
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->dep_item[0], &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+		list_del(completed_jobs.prev);
+
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+						js_kctx_info->ctx.is_dying);
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, dep_item[0]);
+
+			list_del(runnable_jobs.next);
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait then remove it from the list of sync waiters. */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						list_del(&node->dep_item[0]);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				list_add_tail(&node->dep_item[0], &completed_jobs);
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	base_jd_core_req core_req;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	core_req = user_atom->core_req;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = true;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+				kbase_tlstream_tl_new_atom(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				kbase_tlstream_tl_ret_atom_ctx(
+						katom, kctx);
+#endif
+				ret = jd_done_nolock(katom);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it throught
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) {
+				/* Remove the previous dependency */
+				list_del(&katom->dep_item[0]);
+				kbase_jd_katom_dep_clear(&katom->dep[0]);
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+			kbase_tlstream_tl_new_atom(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+#endif
+			if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			ret = jd_done_nolock(katom);
+
+			goto out;
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	/* These must occur after the above loop to ensure that an atom that
+	 * depends on a previous atom with the same number behaves as expected */
+	katom->event_code = BASE_JD_EVENT_DONE;
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_tl_new_atom(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+#endif
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom);
+		goto out;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = false;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+		/* The job has not yet completed */
+		list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+		ret = false;
+	} else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = -EINVAL;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = user_atom_v6.core_req;
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+				katom->status == KBASE_JD_ATOM_STATE_UNUSED)) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+/**
+ * jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+static void jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool schedule = false;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&jctx->lock);
+
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		return;
+	}
+
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+		mutex_unlock(&js_devdata->runpool_mutex);
+	}
+
+	if (kbasep_js_has_atom_finished(&katom_retained_state))
+		schedule = true;
+
+	kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	schedule |= jd_done_nolock(katom);
+
+	/* katom may have been freed now, do not use! */
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	if (schedule)
+		kbase_js_sched_all(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * jd_evict_worker - Work queue job evict function
+ * @data: a &struct work_struct
+ *
+ * Only called as part of evicting failed jobs. This is only called on jobs that
+ * were never submitted to HW Access. Jobs that were submitted are handled
+ * through jd_done_worker().
+ * Operates serially with the jd_done_worker() on the work queue.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or jd_done_worker(), because the atoms here must not be
+ * running (by virtue of having not been submitted to HW Access).
+ */
+static void jd_evict_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+	jd_done_nolock(katom);
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	WARN_ON(work_pending(&katom->work));
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is currently scheduled
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	WARN_ON(work_pending(&katom->work));
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_evict_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+	kbase_jm_wait_for_zero_jobs(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = -EINVAL;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+
+	return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 00000000000..b37f280a647
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,114 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%u %u,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				(unsigned)(atom->dep[0].atom ?
+						atom->dep[0].atom - atoms : 0),
+				(unsigned)(atom->dep[1].atom ?
+						atom->dep[1].atom - atoms : 0),
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 00000000000..703e4cf6a5f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+/**
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 00000000000..bc5ad805818
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,132 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx;
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	WARN_ON(atomic_read(&kctx->atoms_pulled));
+
+	if (kbdev->hwaccess.active_kctx == kctx)
+		kbdev->hwaccess.active_kctx = NULL;
+}
+
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+	}
+}
+
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 00000000000..27aca3a699f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the runpool_irq lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ */
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ */
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 00000000000..c49d622b761
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,3177 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback);
+
+static bool kbase_js_evict_atom(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom_evict,
+				struct kbase_jd_atom *start_katom,
+				struct kbase_jd_atom *head_katom,
+				struct list_head *evict_list,
+				struct jsctx_rb *rb, int idx);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, new_refcnt);
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	return rb->read_idx == rb->write_idx;
+}
+
+/**
+ * jsctx_rb_is_empty(): - Check if all priority ring buffers are empty
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if the ring buffers for all priorities are empty, false
+ *         otherwise.
+ */
+static inline bool
+jsctx_rb_is_empty(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_is_empty_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_rb_compact_prio(): - Compact a ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to compact.
+ * @prio: Priority id to compact.
+ */
+static inline void
+jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	u16 compact_idx = rb->write_idx - 1;
+	u16 end_idx = rb->running_idx - 1;
+	u16 i;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	for (i = compact_idx; i != end_idx; i--) {
+		if (rb->entries[i & JSCTX_RB_MASK].atom_id !=
+				KBASEP_ATOM_ID_INVALID) {
+			WARN_ON(compact_idx < rb->running_idx);
+			rb->entries[compact_idx & JSCTX_RB_MASK].atom_id =
+					rb->entries[i & JSCTX_RB_MASK].atom_id;
+
+			compact_idx--;
+		}
+		if (rb->read_idx == i)
+			rb->read_idx = compact_idx + 1;
+	}
+
+	rb->running_idx = compact_idx + 1;
+}
+
+/**
+ * jsctx_rb_compact(): - Compact all priority ring buffers
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to compact.
+ */
+static inline void
+jsctx_rb_compact(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_rb_compact_prio(kctx, js, prio);
+}
+
+/**
+ * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer
+ * @kctx:     Pointer to kbase context with ring buffer.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a ring buffer and invoke @callback for each entry in buffer, and
+ * remove the entry from the buffer.
+ *
+ * If entries are added to the ring buffer while this is running those entries
+ * may, or may not be covered. To ensure that all entries in the buffer have
+ * been enumerated when this function returns jsctx->lock must be held when
+ * calling this function.
+ *
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
+ */
+static void
+jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	struct kbase_jd_atom *katom;
+	u16 write_idx = ACCESS_ONCE(rb->write_idx);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* There must be no jobs currently in HW access */
+	WARN_ON(rb->read_idx != rb->running_idx);
+
+	/* Invoke callback on all kbase_jd_atoms in the ring buffer, and
+	 * removes them from the buffer */
+	while (rb->read_idx != write_idx) {
+		int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+
+		katom = kbase_jd_atom_from_id(kctx, id);
+
+		rb->read_idx++;
+		rb->running_idx++;
+
+		callback(kctx->kbdev, katom);
+	}
+}
+
+/**
+ * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb
+ * @kctx:     Pointer to kbase context with ring buffer.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback
+ * for each entry in buffer, and remove the entry from the buffer.
+ */
+static inline void
+jsctx_rb_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_rb_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	int id;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (jsctx_rb_is_empty_prio(kctx, js, prio))
+		return NULL;
+
+	id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a
+ * pointer to the last atom, unless all the priority's ring buffers are empty.
+ *
+ * The last atom is the atom that was added using jsctx_rb_add() most recently.
+ *
+ * Return: Pointer to last atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	int id;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	if (jsctx_rb_is_empty_prio(kctx, js, prio))
+		return NULL;
+
+	id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id;
+	return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb->read_idx++;
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be unpulled in correct order. */
+	WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id !=
+			kbase_jd_atom_id(kctx, katom));
+
+	rb->read_idx--;
+}
+
+/**
+ * jsctx_rb_add(): - Add atom to ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to add.
+ *
+ * Add @katom to the ring buffer determined by the atom's priority and job slot
+ * number.
+ *
+ * If the ring buffer is full -EBUSY will be returned.
+ *
+ * Return: On success 0 is returned, on failure a negative error code.
+ */
+static int
+jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* Check if the ring buffer is full */
+	if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE)
+		return -EBUSY;
+
+	rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id =
+			kbase_jd_atom_id(kctx, katom);
+	rb->write_idx++;
+
+	return 0;
+}
+
+/**
+ * jsctx_rb_remove(): - Remove atom from ring buffer
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to remove.
+ *
+ * Remove @katom from the ring buffer.
+ *
+ * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and
+ * atoms must be removed in the same order they were pulled from the ring
+ * buffer.
+ */
+static inline void
+jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* Atoms must be completed in order. */
+	WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id !=
+			kbase_jd_atom_id(kctx, katom));
+
+	rb->running_idx++;
+}
+
+/**
+ * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer
+ * @kctx:        Pointer to kbase context with ring buffer.
+ * @start_katom: Pointer to the first katom to evict.
+ * @head_katom:  Pointer to head katom.
+ * @evict_list:  Pointer to head of list where evicted atoms are added.
+ *
+ * Iterate over the ring buffer starting at @start_katom and evict @start_atom
+ * and dependent atoms in ring buffer.
+ *
+ * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will
+ * examine the atom dependencies.
+ *
+ * jsctx_rb_evict() is only called by kbase_js_evict_deps().
+ */
+static void
+jsctx_rb_evict(struct kbase_context *kctx,
+		struct kbase_jd_atom *start_katom,
+		struct kbase_jd_atom *head_katom,
+		struct list_head *evict_list)
+{
+	int prio = start_katom->sched_priority;
+	int js = start_katom->slot_nr;
+	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	bool atom_in_rb = false;
+	u16 i, start_idx;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	for (i = rb->running_idx; i != rb->write_idx; i++) {
+		if (rb->entries[i & JSCTX_RB_MASK].atom_id ==
+				kbase_jd_atom_id(kctx, start_katom)) {
+			start_idx = i;
+			atom_in_rb = true;
+			break;
+		}
+	}
+
+	/* start_katom must still be in ring buffer. */
+	if (i == rb->write_idx || !atom_in_rb)
+		return;
+
+	/* Evict all dependencies on same slot. */
+	for (i = start_idx; i != rb->write_idx; i++) {
+		u8 katom_evict;
+
+		katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id;
+		if (katom_evict != KBASEP_ATOM_ID_INVALID) {
+			if (!kbase_js_evict_atom(kctx,
+						&kctx->jctx.atoms[katom_evict],
+						start_katom, head_katom,
+						evict_list, rb, i))
+				break;
+		}
+	}
+}
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* All ASs initially free */
+	jsdd->as_free = as_present;
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	jsdd->cfs_ctx_runtime_init_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	jsdd->cfs_ctx_runtime_min_slices =
+		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+			jsdd->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+			jsdd->cfs_ctx_runtime_min_slices);
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+				time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&jsdd->runpool_irq.per_as_data[0], 0,
+			sizeof(jsdd->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&jsdd->runpool_irq.lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (!err)
+		jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+	}
+
+	jsdd->runpool_irq.secure_mode = false;
+	if (kbdev->secure_ops) {
+		/* Make sure secure mode is disabled */
+		kbdev->secure_ops->secure_mode_disable(kbdev);
+	}
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this
+		 */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(
+				js_devdata->runpool_irq.ctx_attr_ref_count,
+				zero_ctx_attr_ref_count,
+				sizeof(zero_ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int err;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	atomic_set(&js_kctx_info->ctx.fault_count, 0);
+	js_kctx_info->ctx.is_scheduled = false;
+	js_kctx_info->ctx.is_dying = false;
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (!err)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot
+ *                                  pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the tail.
+ *
+ * This function should be used when queueing a context for the first time, or
+ * re-queueing a context that has been pulled from.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled))
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled))
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot
+ *                                    unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+				&kbdev->js_data.ctx_list_unpullable[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled))
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or
+ *                            unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled))
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold kbasep_jd_device_data::queue_mutex
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+
+	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+
+	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+		return NULL;
+
+	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+					struct kbase_context,
+					jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* ringbuffer empty */
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
+						BASE_JD_DEP_TYPE_DATA &&
+					js == dep_js) {
+				struct kbase_jd_atom *last_atom =
+						jsctx_rb_peek_last(kctx, js,
+								prio);
+
+				/* Last atom on slot must be pre-dep for this
+				 * atom */
+				if (last_atom != dep_atom) {
+					ret = false;
+					break;
+				}
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+								(js == dep_js))
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_PREV;
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) {
+		/* Ringbuffer was full (should be impossible) - fail the job */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		atom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		goto out_unlock;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required)
+		timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx,
+								atom->slot_nr);
+
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!js_kctx_info->ctx.is_scheduled) {
+		if (js_kctx_info->ctx.is_dying) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context (e.g. KDS
+			 * dependency resolved). Kill that job by killing the
+			 * context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Policy Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ *                            and/or atom
+ *
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		bool retry_submit = false;
+		int retry_jobslot;
+
+		if (katom_retained_state)
+			retry_submit = kbasep_js_get_atom_retry_submit_slot(
+					katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change || retry_submit) {
+			/* A change in runpool ctx attributes might mean we can
+			 * run more jobs than before  */
+			result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+						kctx, NULL, 0u, retry_jobslot);
+		}
+	}
+	return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+			KBASE_CTX_FLAG_PRIVILEGED &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+		kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx);
+#endif
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		if (kbdev->hwaccess.active_kctx == kctx)
+			kbdev->hwaccess.active_kctx = NULL;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after 'is_scheduled' is changed, otherwise we double-decount
+		 * the attributes */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		js_kctx_info->ctx.is_scheduled = false;
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+	if (js_kctx_info->ctx.is_dying) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	base_jd_event_code event_code;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	event_code = katom_retained_state->event_code;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+#if 2 == MALI_INSTRUMENTATION_LEVEL
+	/* When any fault is detected, stop the context from submitting more
+	 * and add a refcount to prevent the context from being removed while
+	 * the job core dump is undergoing in the user space. Once the dump
+	 * finish, it will release the refcount of the context and allow it
+	 * to be removed. The test conditions are to ensure this mechanism
+	 * will be triggered only in the cases that cmarp_event_handler
+	 * handles.
+	 *
+	 * Currently, cmar event handler only handles job exceptions and
+	 * assert the cases where the event code of the atom does not
+	 * belong to the MMU exceptions or GPU exceptions class. In order to
+	 * perform dump on error in those cases, changes in cmar event handler
+	 * need to be made.
+	 */
+	if ((BASE_JD_EVENT_NOT_STARTED != event_code) &&
+		(BASE_JD_EVENT_STOPPED != event_code) &&
+		(BASE_JD_EVENT_ACTIVE != event_code) &&
+		(!((event_code >= BASE_JD_EVENT_RANGE_KERNEL_ONLY_START) &&
+		   (event_code <= BASE_JD_EVENT_RANGE_KERNEL_ONLY_END))) &&
+		((event_code & BASE_JD_SW_EVENT) ||
+		 event_code <= BASE_JD_EVENT_UNKNOWN) &&
+		(BASE_JD_EVENT_DONE != event_code)) {
+		unsigned long flags;
+
+		atomic_inc(&js_kctx_info->ctx.fault_count);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	}
+#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_dump_fault_term(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+	atomic_dec(&kctx->jctx.sched_info.ctx.fault_count);
+}
+
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+static void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_data = &kbdev->js_data;
+
+	if (kbdev->js_scheduling_period_ns < 0)
+		js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	else if (kbdev->js_scheduling_period_ns > 0)
+		js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns;
+
+	if (kbdev->js_soft_stop_ticks < 0)
+		js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	else if (kbdev->js_soft_stop_ticks > 0)
+		js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks;
+
+	if (kbdev->js_soft_stop_ticks_cl < 0)
+		js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	else if (kbdev->js_soft_stop_ticks_cl > 0)
+		js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+		else
+			js_data->hard_stop_ticks_ss =
+					DEFAULT_JS_HARD_STOP_TICKS_SS;
+	} else if (kbdev->js_hard_stop_ticks_ss > 0) {
+		js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
+	}
+
+	if (kbdev->js_hard_stop_ticks_cl < 0)
+		js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	else if (kbdev->js_hard_stop_ticks_cl > 0)
+		js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
+
+	if (kbdev->js_hard_stop_ticks_dumping < 0)
+		js_data->hard_stop_ticks_dumping =
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	else if (kbdev->js_hard_stop_ticks_dumping > 0)
+		js_data->hard_stop_ticks_dumping =
+				kbdev->js_hard_stop_ticks_dumping;
+
+	if (kbdev->js_reset_ticks_ss < 0) {
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			js_data->gpu_reset_ticks_ss =
+					DEFAULT_JS_RESET_TICKS_SS_8408;
+		else
+			js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	} else if (kbdev->js_reset_ticks_ss > 0) {
+		js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
+	}
+
+	if (kbdev->js_reset_ticks_cl < 0)
+		js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	else if (kbdev->js_reset_ticks_cl > 0)
+		js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+
+	if (kbdev->js_reset_ticks_dumping < 0)
+		js_data->gpu_reset_ticks_dumping =
+				DEFAULT_JS_RESET_TICKS_DUMPING;
+	else if (kbdev->js_reset_ticks_dumping > 0)
+		js_data->gpu_reset_ticks_dumping =
+				kbdev->js_reset_ticks_dumping;
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_as *new_address_space = NULL;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	new_address_space = &kbdev->as[as_nr];
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (js_kctx_info->ctx.is_dying) {
+		/* Roll back the transaction so far and return */
+		kbase_backend_release_free_address_space(kbdev, as_nr);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	if (js_devdata->nr_user_contexts_running == 0 &&
+			kbdev->js_timeouts_updated) {
+		/* Only when there are no other contexts submitting jobs:
+		 * Latch in run-time job scheduler timeouts that were set
+		 * through js_timeouts sysfs file */
+		kbase_js_set_timeouts(kbdev);
+
+		kbdev->js_timeouts_updated = false;
+	}
+
+	js_kctx_info->ctx.is_scheduled = true;
+
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&new_address_space->transaction_mutex);
+		/* If address space is not pending, then kbase_backend_use_ctx()
+		 * failed. Roll back the transaction so far and return */
+		if (!kctx->as_pending) {
+			js_kctx_info->ctx.is_scheduled = false;
+
+			kbase_backend_release_free_address_space(kbdev, as_nr);
+		}
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx);
+#endif
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the runpool_irq locking. If a suspend occurs *after* that lock was
+	 * taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
+
+	/* Synchronize with any policy timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	if (kctx->as_pending) {
+		/* Context waiting for AS to be assigned */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return false;
+	}
+	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+		/* Context already has ASID - mark as active */
+		kbdev->hwaccess.active_kctx = kctx;
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		return true; /* Context already scheduled */
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			kctx->jctx.sched_info.ctx.is_scheduled);
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool pending;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+	pending = kctx->as_pending;
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out if there is no
+	 * pending job */
+	if (!pending)
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_PRIVILEGED)
+				KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+			&js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_context *kctx, *n;
+
+		list_for_each_entry_safe(kctx, n,
+				&kbdev->js_data.ctx_list_unpullable[js],
+				jctx.sched_info.ctx.ctx_list_entry[js]) {
+			struct kbasep_js_kctx_info *js_kctx_info;
+			unsigned long flags;
+			bool timer_sync = false;
+
+			js_kctx_info = &kctx->jctx.sched_info;
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			if (!js_kctx_info->ctx.is_scheduled &&
+				kbase_js_ctx_pullable(kctx, js, false))
+				timer_sync = kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			if (timer_sync)
+				kbase_backend_ctx_count_changed(kbdev);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom,
+					bool *enqueue_required)
+{
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_is_empty(kctx, katom->slot_nr)) {
+		*enqueue_required = true;
+	} else {
+		*enqueue_required = false;
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+	}
+
+	/* Add atom to ring buffer. */
+	if (unlikely(jsctx_rb_add_atom(kctx, katom))) {
+		/* The ring buffer is full. This should be impossible as the
+		 * job dispatcher can not submit enough atoms to exceed the
+		 * ring buffer size. Fail the job.
+		 */
+		WARN(1, "Job submit while JSCTX ringbuffer already full\n");
+		return -EINVAL;
+	}
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+
+	return 0;
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	js_devdata = &kctx->kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kctx->kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+				atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kctx->kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return NULL;
+	}
+
+	kctx->pulled = true;
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable)
+		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->sched_info.cfs.ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_is_empty(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable)
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!js_kctx_info->ctx.is_dying) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+						kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static bool kbase_js_evict_atom(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom_evict,
+				struct kbase_jd_atom *start_katom,
+				struct kbase_jd_atom *head_katom,
+				struct list_head *evict_list,
+				struct jsctx_rb *rb, int idx)
+{
+	struct kbase_jd_atom *x_dep = katom_evict->x_post_dep;
+
+	if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+						katom_evict != start_katom)
+		return false;
+
+	if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+		WARN_ON(katom_evict->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS);
+		WARN_ON(katom_evict->event_code != head_katom->event_code);
+
+		return false;
+	}
+
+	if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+						katom_evict != head_katom)
+		return false;
+
+	/* Evict cross dependency if present */
+	if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
+			&& (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
+		list_add_tail(&x_dep->dep_item[0], evict_list);
+
+	/* If cross dependency is present and does not have a data dependency
+	 * then unblock */
+	if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
+			&& !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+	if (katom_evict != head_katom) {
+		rb->entries[idx & JSCTX_RB_MASK].atom_id =
+				KBASEP_ATOM_ID_INVALID;
+
+		katom_evict->event_code = head_katom->event_code;
+		katom_evict->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+
+		if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF)
+			kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL,
+									0);
+		else
+			kbase_jd_evict(kctx->kbdev, katom_evict);
+	}
+
+	return true;
+}
+
+/**
+ * kbase_js_evict_deps - Evict dependencies
+ * @kctx:       Context pointer
+ * @head_katom: Pointer to the atom to evict
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold both jctx and HW access locks
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *head_katom)
+{
+	struct list_head evict_list;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	INIT_LIST_HEAD(&evict_list);
+
+	list_add_tail(&head_katom->dep_item[0], &evict_list);
+
+	while (!list_empty(&evict_list)) {
+		struct kbase_jd_atom *start_katom;
+
+		start_katom = list_entry(evict_list.prev, struct kbase_jd_atom,
+								dep_item[0]);
+		list_del(evict_list.prev);
+
+		jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list);
+	}
+}
+
+/**
+ * kbase_js_compact - Compact JSCTX ringbuffers
+ * @kctx:  Context pointer
+ *
+ * Compact the JSCTX ringbuffers, removing any NULL entries
+ *
+ * Context: Caller must hold both jctx and HW access locks
+ */
+static void kbase_js_compact(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_rb_compact(kctx, js);
+}
+
+void kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
+		if (katom->event_code != BASE_JD_EVENT_DONE)
+			kbase_js_evict_deps(kctx, katom);
+
+		jsctx_rb_remove(kctx, katom);
+
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+		if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable)
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE)
+			kbase_js_compact(kctx);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_is_empty(kctx, atom_slot))
+		timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx,
+				atom_slot);
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!js_kctx_info->ctx.is_dying) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kbdev, kctx, js);
+		}
+	}
+
+	if (context_idle)
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+}
+
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	u64 microseconds_spent = 0;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_device_data *js_devdata;
+
+	kbdev = kctx->kbdev;
+
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+#endif
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g.
+		 * removed from next jobs never actually ran, so really did take
+		 * zero time) */
+		ktime_t tick_diff = ktime_sub(*end_timestamp,
+							katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(katom->x_post_dep->atom_flags &
+						KBASE_KATOM_FLAG_FAIL_BLOCKER)))
+		katom->x_post_dep->atom_flags &=
+					~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	bool timer_sync = false;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &js_devdata->policy;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	while (js_mask) {
+		int js;
+
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!atomic_read(&kctx->atoms_pulled)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+									flags);
+				if (kctx->as_pending ||
+					kbase_js_ctx_pullable(kctx, js, false)
+					|| (kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(
+					&js_devdata->runpool_irq.lock, flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle)
+					kbase_pm_context_idle(kbdev);
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+			kctx->pulled = false;
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kctx->pulled) {
+				/* Failed to pull jobs - push to head of list */
+				if (kbase_js_ctx_pullable(kctx, js, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head(
+								kctx->kbdev,
+								kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable(
+								kctx->kbdev,
+								kctx, js);
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |= kbase_js_ctx_list_add_pullable(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |= kbase_js_ctx_list_add_unpullable(
+							kctx->kbdev, kctx, js);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.is_dying = true;
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the policy queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!js_kctx_info->ctx.is_scheduled) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the policy
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						js_kctx_info->ctx.is_scheduled);
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_policy_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_rb_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 00000000000..79e7705932f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1054 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ *         upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ *         being busy.
+ *         NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+		struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+
+/**
+ * @brief Release the refcount of the context and allow further submission
+ * of the context after the dump on error in user space terminates.
+ *
+ * Before this function is called, when a fault happens the kernel should
+ * have disallowed the context from further submission of jobs and
+ * retained the context to avoid it from being removed. This function
+ * releases the refcount of the context and allow further submission of
+ * jobs.
+ *
+ * This function should only be called when "instr=2" during compile time.
+ */
+void kbasep_js_dump_fault_term(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return 0 if submit succeeded
+ *         error code if the atom can not be submitted at this
+ *         time, due to insufficient space in the ringbuffer, or dependencies
+ *         that can not be represented.
+ */
+int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom,
+					bool *enqueue_required);
+
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ */
+void kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ */
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+					KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+				|| (atom->retry_submit_on_slot == js));
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+
+	*res = js;
+	return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 00000000000..49266007935
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,310 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) {
+		/* Compute context */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	}
+	/* NOTE: Whether this is a non-compute context depends on the jobs being
+	 * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 00000000000..ce9183326a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 00000000000..7532f9c9e71
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,517 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1),
+
+	/** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */
+	KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+	struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+	struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+	struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ *
+ * Examples of use:
+ * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE
+ * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type
+	 * @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
+	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a u8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+
+		/*
+		 * true when GPU is put into secure mode
+		 */
+		bool secure_mode;
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
+		/**
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		bool is_dying;			/**< Is the context in the process of being evicted? */
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+
+		atomic_t fault_count;		/**< The no. of times the context is retained due to the fault job. */
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100644
index 00000000000..2094586ff2d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned true, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100644
index 00000000000..692460710ce
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	policy_info->head_runtime_us = 0;
+
+	return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = true;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = false;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+		return true;
+
+	if (current_ctx_info->process_rt_policy ==
+			new_ctx_info->process_rt_policy)
+		return true;
+
+	return false;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100644
index 00000000000..b457d8215ab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us:  Number of microseconds the least-run context has been
+ *                    running for. The kbasep_js_device_data.queue_mutex must
+ *                    be held whilst updating this.
+ *                    Reads are possible without this mutex, but an older value
+ *                    might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ *                    context queue has been running for.
+ *                    -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ *                       realtime (priority) context queue has been running for.
+ *                       -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+	u64 head_runtime_us;
+	atomic64_t least_runtime_us;
+	atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us:        microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ *                     scheduler and will use the priority queue. Non-mutable
+ *                     after ctx init
+ * @process_priority:  calling process NICE priority, in the range -20..19
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+	u64 runtime_us;
+	bool process_rt_policy;
+	int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+	u32 ticks;
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 00000000000..6d1e61fd41e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 00000000000..2c7fb5a4032
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,1363 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+		struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+		(reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	int err = 0;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align);
+		if (!tmp) {
+			err = -ENOMEM;
+			goto exit;
+		}
+		start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1);
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages);
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kctx->is_compat)
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits)
+		return -EINVAL;
+
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			(1ULL << (same_va_bits - PAGE_SHIFT)) - 2,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg)
+		return -ENOMEM;
+
+#ifdef CONFIG_64BIT
+	/* only 32-bit clients have the other two zones */
+	if (kctx->is_compat) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			kbase_free_alloced_region(same_va_reg);
+			return -EINVAL;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			kbase_free_alloced_region(same_va_reg);
+			return -ENOMEM;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			kbase_free_alloced_region(same_va_reg);
+			kbase_free_alloced_region(exec_reg);
+			return -ENOMEM;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	return 0;
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	/* nothing to do, zero-inited when struct kbase_device was created */
+	return 0;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	/* Set up default MEMATTR usage */
+	new_reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+#if defined(CONFIG_MALI_CACHE_COHERENT)
+	unsigned long attr =
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+#else
+	unsigned long attr =
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+#endif
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->cpu_alloc->mappings) {
+		map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, u64 gpu_addr,
+	unsigned long uaddr, size_t size, u64 * offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = 0;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *cpu_pa;
+	phys_addr_t *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	off_t offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	/* Sync first page */
+	if (cpu_pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!cpu_pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	int err = -EINVAL;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	BUG_ON(!mutex_is_locked(&kctx->reg_lock));
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/* Wait for GPU to flush write buffer before freeing physical pages */
+		kbase_wait_write_flush(kctx);
+	}
+#endif
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_SYSTEM ||
+			flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	else if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc,
+			nr_pages_requested, alloc->pages + alloc->nents) != 0)
+		goto no_alloc;
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested);
+#endif
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	bool syncback;
+	phys_addr_t *start_free;
+
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	kbase_mem_allocator_free(&alloc->imported.kctx->osalloc,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback);
+
+	alloc->nents -= nr_pages_to_free;
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
+	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free);
+#endif
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 00000000000..d8a03526a64
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,774 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t           *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)	/* Dedicated 16MB region for shader code */
+#define KBASE_REG_ZONE_EXEC_BASE    ((1ULL << 32) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	const size_t alloc_size =
+			sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+
+	/* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ sizeof(*alloc->pages)))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+	reg->cpu_alloc->imported.kctx = kctx;
+	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		reg->gpu_alloc->imported.kctx = kctx;
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/**
+ * @brief Initialize low-level memory access for a kbase device
+ *
+ * Performs any low-level setup needed for a kbase device to access memory on
+ * the device.
+ *
+ * @param kbdev kbase device to initialize memory access for
+ * @return 0 on success, Linux error code on failure
+ */
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev);
+
+
+/**
+ * @brief Terminate low-level memory access for a kbase device
+ *
+ * Perform any low-level cleanup needed to clean
+ * after @ref kbase_mem_lowlevel_init
+ *
+ * @param kbdev kbase device to clean up for
+ */
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize an OS based memory allocator.
+ *
+ * Initializes a allocator.
+ * Must be called before any allocation is attempted.
+ * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used
+ * to allocate and free memory.
+ * \a kbase_mem_allocator_term must be called to clean up the allocator.
+ * All memory obtained via \a kbase_mem_allocator_alloc must have been
+ * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called.
+ *
+ * @param allocator Allocator object to initialize
+ * @param max_size Maximum number of pages to keep on the freelist.
+ * @param kbdev The kbase device this allocator is used with
+ * @return 0 on success, an error code indicating what failed on
+ * error.
+ */
+int kbase_mem_allocator_init(struct kbase_mem_allocator *allocator,
+				    unsigned int max_size,
+				    struct kbase_device *kbdev);
+
+/**
+ * @brief Allocate memory via an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to obtain the memory from
+ * @param nr_pages Number of pages to allocate
+ * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored
+ * @return 0 if the pages were allocated, an error code indicating what failed on error
+ */
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages);
+
+/**
+ * @brief Free memory obtained for an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to free the memory back to
+ * @param nr_pages Number of pages to free
+ * @param[in] pages Pointer to an array holding the physical address of the paghes to free.
+ * @param[in] sync_back true case the memory should be synced back
+ */
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back);
+
+/**
+ * @brief Terminate an OS based memory allocator.
+ *
+ * Frees all cached allocations and clean up internal state.
+ * All allocate pages must have been \a kbase_mem_allocator_free before
+ * this function is called.
+ *
+ * @param[in] allocator Allocator to terminate
+ */
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator);
+
+
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space.
+ *
+ * @kbdev:	Kbase device
+ * @as_nr:	Number of the address space for which the MMU
+ *		should be set in unmapped mode.
+ *
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+		phys_addr_t gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/**
+ * Set attributes for imported tmem region
+ *
+ * This function sets (extends with) requested attributes for given region
+ * of imported external memory
+ *
+ * @param[in]  kctx         The kbase context which the tmem belongs to
+ * @param[in]  gpu_addr     The base address of the tmem region
+ * @param[in]  attributes   The attributes of tmem region to be set
+ *
+ * @return 0 on success.  Any other value indicates failure.
+ */
+int kbase_tmem_set_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 attributes);
+
+/**
+ * Get attributes of imported tmem region
+ *
+ * This function retrieves the attributes of imported external memory
+ *
+ * @param[in]  kctx         The kbase context which the tmem belongs to
+ * @param[in]  gpu_addr     The base address of the tmem region
+ * @param[out] attributes   The actual attributes of tmem region
+ *
+ * @return 0 on success.  Any other value indicates failure.
+ */
+int kbase_tmem_get_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 *const attributes);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							u64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private filed stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
new file mode 100644
index 00000000000..642fa44bf35
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+}
+
+static unsigned long kbase_mem_allocator_count(struct shrinker *s,
+						struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+	return atomic_read(&allocator->free_list_size);
+}
+
+static unsigned long kbase_mem_allocator_scan(struct shrinker *s,
+						struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+	int i;
+	int freed;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+	might_sleep();
+
+	mutex_lock(&allocator->free_list_lock);
+	i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+	freed = i;
+
+	atomic_sub(i, &allocator->free_list_size);
+
+	while (i--) {
+		struct page *p;
+
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head,
+					struct page, lru);
+		list_del(&p->lru);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	return atomic_read(&allocator->free_list_size);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_allocator_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_allocator_count(s, sc);
+
+	return kbase_mem_allocator_scan(s, sc);
+}
+#endif
+
+int kbase_mem_allocator_init(struct kbase_mem_allocator *const allocator,
+		unsigned int max_size, struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	INIT_LIST_HEAD(&allocator->free_list_head);
+
+	allocator->kbdev = kbdev;
+
+	mutex_init(&allocator->free_list_lock);
+
+	atomic_set(&allocator->free_list_size, 0);
+
+	allocator->free_list_max_size = max_size;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+#else
+	allocator->free_list_reclaimer.count_objects =
+						kbase_mem_allocator_count;
+	allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan;
+#endif
+	allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	allocator->free_list_reclaimer.batch = 0;
+#endif
+
+	register_shrinker(&allocator->free_list_reclaimer);
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_init);
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	unregister_shrinker(&allocator->free_list_reclaimer);
+	mutex_lock(&allocator->free_list_lock);
+	while (!list_empty(&allocator->free_list_head)) {
+		struct page *p;
+
+		p = list_first_entry(&allocator->free_list_head, struct page,
+				     lru);
+		list_del(&p->lru);
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+			       PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+	atomic_set(&allocator->free_list_size, 0);
+	mutex_unlock(&allocator->free_list_lock);
+	mutex_destroy(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_term);
+
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+	struct page *p;
+	int i;
+	int num_from_free_list;
+	struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+	gfp_t gfp;
+
+	might_sleep();
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	/* take from the free list first */
+	mutex_lock(&allocator->free_list_lock);
+	num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+	atomic_sub(num_from_free_list, &allocator->free_list_size);
+	for (i = 0; i < num_from_free_list; i++) {
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_move(&p->lru, &from_free_list);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	i = 0;
+
+	/* Allocate as many pages from the pool of already allocated pages. */
+	list_for_each_entry(p, &from_free_list, lru) {
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		i++;
+	}
+
+	if (i == nr_pages)
+		return 0;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	/* If not all pages were sourced from the pool, request new ones. */
+	for (; i < nr_pages; i++) {
+		dma_addr_t dma_addr;
+
+		p = alloc_page(gfp);
+		if (NULL == p)
+			goto err_out_roll_back;
+
+		dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+			__free_page(p);
+			goto err_out_roll_back;
+		}
+
+		kbase_set_dma_addr(p, dma_addr);
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		BUG_ON(dma_addr != pages[i]);
+	}
+
+	return 0;
+
+err_out_roll_back:
+	while (i--) {
+		p = pfn_to_page(PFN_DOWN(pages[i]));
+		pages[i] = (phys_addr_t)0;
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+			       PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+
+	return -ENOMEM;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc);
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back)
+{
+	int i = 0;
+	int page_count = 0;
+	int tofree;
+
+	LIST_HEAD(new_free_list_items);
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	might_sleep();
+
+	/* Starting by just freeing the overspill.
+	* As we do this outside of the lock we might spill too many pages
+	* or get too many on the free list, but the max_size is just a ballpark so it is ok
+	* providing that tofree doesn't exceed nr_pages
+	*/
+	tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+	tofree = nr_pages - MIN(tofree, nr_pages);
+	for (; i < tofree; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+				       PAGE_SIZE,
+				       DMA_BIDIRECTIONAL);
+			ClearPagePrivate(p);
+			pages[i] = (phys_addr_t)0;
+			__free_page(p);
+		}
+	}
+
+	for (; i < nr_pages; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			/* Sync back the memory to ensure that future cache
+			 * invalidations don't trample on memory.
+			 */
+			if (sync_back)
+				dma_sync_single_for_cpu(allocator->kbdev->dev,
+						kbase_dma_addr(p),
+						PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+
+			list_add(&p->lru, &new_free_list_items);
+			page_count++;
+		}
+	}
+	mutex_lock(&allocator->free_list_lock);
+	list_splice(&new_free_list_items, &allocator->free_list_head);
+	atomic_add(page_count, &allocator->free_list_size);
+	mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
new file mode 100644
index 00000000000..ab7b25953f2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
@@ -0,0 +1,32 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+/* raw page handling */
+struct kbase_mem_allocator {
+	struct kbase_device *kbdev;
+	atomic_t            free_list_size;
+	unsigned int        free_list_max_size;
+	struct mutex        free_list_lock;
+	struct list_head    free_list_head;
+	struct shrinker     free_list_reclaimer;
+};
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
new file mode 100644
index 00000000000..df9761b1e2e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
@@ -0,0 +1,402 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/memblock.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+
+
+/* This code does not support having multiple kbase devices, or rmmod/insmod */
+
+static unsigned long kbase_carveout_start_pfn = ~0UL;
+static unsigned long kbase_carveout_end_pfn;
+static LIST_HEAD(kbase_carveout_free_list);
+static DEFINE_MUTEX(kbase_carveout_free_list_lock);
+static unsigned int kbase_carveout_pages;
+static atomic_t kbase_carveout_used_pages;
+static atomic_t kbase_carveout_system_pages;
+
+static struct page *kbase_carveout_get_page(struct kbase_mem_allocator *allocator)
+{
+	struct page *p = NULL;
+	gfp_t gfp;
+
+	mutex_lock(&kbase_carveout_free_list_lock);
+	if (!list_empty(&kbase_carveout_free_list)) {
+		p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+		list_del(&p->lru);
+		atomic_inc(&kbase_carveout_used_pages);
+	}
+	mutex_unlock(&kbase_carveout_free_list_lock);
+
+	if (!p) {
+		dma_addr_t dma_addr;
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+		/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+		gfp = GFP_USER | __GFP_ZERO;
+#else
+		gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+		if (current->flags & PF_KTHREAD) {
+			/* Don't trigger OOM killer from kernel threads, e.g.
+			 * when growing memory on GPU page fault */
+			gfp |= __GFP_NORETRY;
+		}
+
+		p = alloc_page(gfp);
+		if (!p)
+			goto out;
+
+		dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+			__free_page(p);
+			p = NULL;
+			goto out;
+		}
+
+		kbase_set_dma_addr(p, dma_addr);
+		BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+		atomic_inc(&kbase_carveout_system_pages);
+	}
+out:
+	return p;
+}
+
+static void kbase_carveout_put_page(struct page *p,
+				    struct kbase_mem_allocator *allocator)
+{
+	if (page_to_pfn(p) >= kbase_carveout_start_pfn &&
+			page_to_pfn(p) <= kbase_carveout_end_pfn) {
+		mutex_lock(&kbase_carveout_free_list_lock);
+		list_add(&p->lru, &kbase_carveout_free_list);
+		atomic_dec(&kbase_carveout_used_pages);
+		mutex_unlock(&kbase_carveout_free_list_lock);
+	} else {
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+				PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+		atomic_dec(&kbase_carveout_system_pages);
+	}
+}
+
+static int kbase_carveout_seq_show(struct seq_file *s, void *data)
+{
+	seq_printf(s, "carveout pages: %u\n", kbase_carveout_pages);
+	seq_printf(s, "used carveout pages: %u\n",
+			atomic_read(&kbase_carveout_used_pages));
+	seq_printf(s, "used system pages: %u\n",
+			atomic_read(&kbase_carveout_system_pages));
+	return 0;
+}
+
+static int kbasep_carveout_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kbase_carveout_seq_show, NULL);
+}
+
+static const struct file_operations kbase_carveout_debugfs_fops = {
+	.open           = kbasep_carveout_debugfs_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release_private,
+};
+
+static int kbase_carveout_init(struct device *dev)
+{
+	unsigned long pfn;
+	static int once;
+
+	mutex_lock(&kbase_carveout_free_list_lock);
+	BUG_ON(once);
+	once = 1;
+
+	for (pfn = kbase_carveout_start_pfn; pfn <= kbase_carveout_end_pfn; pfn++) {
+		struct page *p = pfn_to_page(pfn);
+		dma_addr_t dma_addr;
+
+		dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto out_rollback;
+
+		kbase_set_dma_addr(p, dma_addr);
+		BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+
+		list_add_tail(&p->lru, &kbase_carveout_free_list);
+	}
+
+	mutex_unlock(&kbase_carveout_free_list_lock);
+
+	debugfs_create_file("kbase_carveout", S_IRUGO, NULL, NULL,
+		    &kbase_carveout_debugfs_fops);
+
+	return 0;
+
+out_rollback:
+	while (!list_empty(&kbase_carveout_free_list)) {
+		struct page *p;
+
+		p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+		dma_unmap_page(dev, kbase_dma_addr(p),
+				PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		list_del(&p->lru);
+	}
+
+	mutex_unlock(&kbase_carveout_free_list_lock);
+	return -ENOMEM;
+}
+
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	phys_addr_t mem;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) \
+		&& LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ACCESSIBLE);
+#else
+	mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+#endif
+	if (mem == 0) {
+		pr_warn("%s: Failed to allocate %d for kbase carveout\n",
+				__func__, size);
+		return -ENOMEM;
+	}
+
+	kbase_carveout_start_pfn = PFN_DOWN(mem);
+	kbase_carveout_end_pfn = PFN_DOWN(mem + size - 1);
+	kbase_carveout_pages = kbase_carveout_end_pfn - kbase_carveout_start_pfn + 1;
+
+	return 0;
+}
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+	return kbase_carveout_init(kbdev->dev);
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+}
+
+static int kbase_mem_allocator_shrink(struct shrinker *s, struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+	int i;
+	int freed;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+	if (sc->nr_to_scan == 0)
+		return atomic_read(&allocator->free_list_size);
+
+	might_sleep();
+
+	mutex_lock(&allocator->free_list_lock);
+	i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+	freed = i;
+
+	atomic_sub(i, &allocator->free_list_size);
+
+	while (i--) {
+		struct page *p;
+
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_del(&p->lru);
+		kbase_carveout_put_page(p, allocator);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	return atomic_read(&allocator->free_list_size);
+}
+
+int kbase_mem_allocator_init(struct kbase_mem_allocator * const allocator,
+		unsigned int max_size, struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	INIT_LIST_HEAD(&allocator->free_list_head);
+
+	allocator->kbdev = kbdev;
+
+	mutex_init(&allocator->free_list_lock);
+
+	atomic_set(&allocator->free_list_size, 0);
+
+	allocator->free_list_max_size = max_size;
+	allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+	allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) /* Kernel versions prior to 3.1 : struct shrinker does not define batch */
+	allocator->free_list_reclaimer.batch = 0;
+#endif
+
+	register_shrinker(&allocator->free_list_reclaimer);
+
+	return 0;
+}
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	unregister_shrinker(&allocator->free_list_reclaimer);
+
+	while (!list_empty(&allocator->free_list_head)) {
+		struct page *p;
+
+		p = list_first_entry(&allocator->free_list_head, struct page,
+				lru);
+		list_del(&p->lru);
+
+		kbase_carveout_put_page(p, allocator);
+	}
+	mutex_destroy(&allocator->free_list_lock);
+}
+
+
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+	struct page *p;
+	int i;
+	int num_from_free_list;
+	struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+
+	might_sleep();
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	/* take from the free list first */
+	mutex_lock(&allocator->free_list_lock);
+	num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+	atomic_sub(num_from_free_list, &allocator->free_list_size);
+	for (i = 0; i < num_from_free_list; i++) {
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_move(&p->lru, &from_free_list);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	i = 0;
+
+	/* Allocate as many pages from the pool of already allocated pages. */
+	list_for_each_entry(p, &from_free_list, lru) {
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		i++;
+	}
+
+	if (i == nr_pages)
+		return 0;
+
+	/* If not all pages were sourced from the pool, request new ones. */
+	for (; i < nr_pages; i++) {
+		p = kbase_carveout_get_page(allocator);
+		if (NULL == p)
+			goto err_out_roll_back;
+
+		kbase_sync_single_for_device(allocator->kbdev,
+					   kbase_dma_addr(p),
+					   PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+	}
+
+	return 0;
+
+err_out_roll_back:
+	while (i--) {
+		struct page *p;
+
+		p = pfn_to_page(PFN_DOWN(pages[i]));
+		pages[i] = (phys_addr_t)0;
+		kbase_carveout_put_page(p, allocator);
+	}
+
+	return -ENOMEM;
+}
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, u32 nr_pages, phys_addr_t *pages, bool sync_back)
+{
+	int i = 0;
+	int page_count = 0;
+	int tofree;
+
+	LIST_HEAD(new_free_list_items);
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	might_sleep();
+
+	/* Starting by just freeing the overspill.
+	* As we do this outside of the lock we might spill too many pages
+	* or get too many on the free list, but the max_size is just a ballpark so it is ok
+	* providing that tofree doesn't exceed nr_pages
+	*/
+	tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+	tofree = nr_pages - MIN(tofree, nr_pages);
+	for (; i < tofree; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			kbase_carveout_put_page(p, allocator);
+		}
+	}
+
+	for (; i < nr_pages; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			/* Sync back the memory to ensure that future cache
+			 * invalidations don't trample on memory.
+			 */
+			if (sync_back)
+				kbase_sync_single_for_cpu(allocator->kbdev,
+						kbase_dma_addr(p),
+						PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+			list_add(&p->lru, &new_free_list_items);
+			page_count++;
+		}
+	}
+	mutex_lock(&allocator->free_list_lock);
+	list_splice(&new_free_list_items, &allocator->free_list_head);
+	atomic_add(page_count, &allocator->free_list_size);
+	mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 00000000000..1ddfe5c9737
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,1909 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kctx->is_compat)
+		cpu_va_bits = 32;
+	else
+		/* force SAME_VA if a 64-bit client */
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			kctx->kbdev->system_coherency != COHERENCY_ACE) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			kctx->kbdev->system_coherency != COHERENCY_ACE) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	kbase_update_region_flags(reg, *flags);
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+	return reg;
+
+no_mmap:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* limit to imported memory */
+	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		break;
+#endif
+	default:
+		break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+	if (*flags & BASE_MEM_SECURE)
+		goto bad_flags;
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	reg->gpu_alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->gpu_alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+bad_flags:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->gpu_alloc->type = BASE_TMEM_IMPORT_TYPE_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	kbase_update_region_flags(reg, *flags);
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat)
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP:
+		reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags);
+		break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM:
+		reg = kbase_mem_from_umm(kctx, handle, va_pages, flags);
+		break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	default:
+		reg = NULL;
+		break;
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, start, local_end - start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	phys_addr_t *phy_pages;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_read(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	old_pages = kbase_reg_current_backed_size(reg);
+
+	if (new_pages > old_pages) {
+		/* growing */
+		int err;
+
+		delta = new_pages - old_pages;
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+				phy_pages + old_pages, delta, reg->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		/* shrinking */
+		struct kbase_cpu_mapping *mapping;
+		int err;
+
+		/* first, unmap from any mappings affected */
+		list_for_each_entry(mapping, &reg->cpu_alloc->mappings, mappings_list) {
+			unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT;
+
+			/* is this mapping affected ?*/
+			if ((mapping->page_off + mapping_size) > new_pages) {
+				unsigned long first_bad = 0;
+				int zap_res;
+
+				if (new_pages > mapping->page_off)
+					first_bad = new_pages - mapping->page_off;
+
+				zap_res = zap_range_nolock(current->mm,
+						&kbase_vm_ops,
+						mapping->vm_start +
+						(first_bad << PAGE_SHIFT),
+						mapping->vm_end);
+				WARN(zap_res,
+				     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+				     mapping->vm_start +
+				     (first_bad << PAGE_SHIFT),
+				     mapping->vm_end
+				     );
+			}
+		}
+
+		/* Free some pages */
+		delta = old_pages - new_pages;
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages,
+				delta);
+		if (err) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+#ifndef CONFIG_MALI_NO_MALI
+		if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+			/* Wait for GPU to flush write buffer before freeing physical pages */
+			kbase_wait_write_flush(kctx);
+		}
+#endif
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+	res = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	send_sig(SIGSEGV, current, 1);
+	return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_allocator_alloc which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT);
+			if (WARN_ON(err))
+				break;
+		}
+	} else {
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		kbase_device_trace_buffer_install(kctx, tb, size);
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->cpu_alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASE_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (NULL != reg) {
+			if (reg->flags & KBASE_REG_ALIGNED) {
+				/* nr_pages must be able to hold alignment pages
+				 * plus actual pages */
+				if (nr_pages != ((1UL << gpu_pc_bits >>
+							PAGE_SHIFT) +
+							reg->nr_pages)) {
+					/* incorrect mmap size */
+					/* leave the cookie for a potential
+					 * later mapping, or to be reclaimed
+					 * later when the context is freed */
+					err = -ENOMEM;
+					goto out_unlock;
+				}
+
+				aligned_offset = (vma->vm_start +
+						  (1UL << gpu_pc_bits) - 1) &
+						 ~((1UL << gpu_pc_bits) - 1);
+				aligned_offset -= vma->vm_start;
+			} else if (reg->nr_pages != nr_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential later
+				 * mapping, or to be reclaimed later when the
+				 * context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+			/* adjust down nr_pages to what we have physically */
+			nr_pages = kbase_reg_current_backed_size(reg);
+
+			if (kbase_gpu_mmap(kctx, reg,
+					vma->vm_start + aligned_offset,
+					reg->nr_pages, 1) != 0) {
+				dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+				/* Unable to map in GPU space. */
+				WARN_ON(1);
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			/* no need for the cookie anymore */
+			kctx->pending_regions[cookie] = NULL;
+			kctx->cookies |= (1UL << cookie);
+
+			/*
+			 * Overwrite the offset with the
+			 * region start_pfn, so we effectively
+			 * map from offset 0 in the region.
+			 */
+			vma->vm_pgoff = reg->start_pfn;
+
+			/* free the region on munmap */
+			free_on_close = 1;
+			goto map;
+		}
+
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+	bool sync_needed;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+	sync_needed = map->is_cached;
+
+	if (sync_needed) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_CPU);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_CPU);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_CPU);
+		}
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	bool sync_needed = map->is_cached;
+	vunmap(addr);
+	if (sync_needed) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t cpu_pa = map->cpu_pages[0];
+		phys_addr_t gpu_pa = map->gpu_pages[0];
+
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+				KBASE_SYNC_TO_DEVICE);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			cpu_pa = map->cpu_pages[i];
+			gpu_pa = map->gpu_pages[i];
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+					KBASE_SYNC_TO_DEVICE);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			cpu_pa = map->cpu_pages[page_count - 1];
+			gpu_pa = map->gpu_pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+					KBASE_SYNC_TO_DEVICE);
+		}
+	}
+	map->gpu_addr = 0;
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(reg, flags);
+
+	reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 00000000000..1d854152704
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+struct kbase_vmap_struct {
+	u64 gpu_addr;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	phys_addr_t *cpu_pages;
+	phys_addr_t *gpu_pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 00000000000..441180b64d5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 00000000000..bf60c192029
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/* mam_profile file name max length 22 based on format <int>_<int>\0 */
+#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1)
+
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size)
+{
+	spin_lock(&kctx->mem_profile_lock);
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = data;
+	kctx->mem_profile_size = size;
+	spin_unlock(&kctx->mem_profile_lock);
+}
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock(&kctx->mem_profile_lock);
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+	seq_putc(sfile, '\n');
+	spin_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock_init(&kctx->mem_profile_lock);
+
+	debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_mem_profile_debugfs_fops);
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock(&kctx->mem_profile_lock);
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	spin_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+/**
+ * @brief Stub function for when debugfs is disabled
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size)
+{
+	kfree(data);
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 00000000000..205bd378c8e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Add new entry to Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert data to debugfs file, so it can be read by userspce
+ *
+ * Function takes ownership of @c data and frees it later when new data
+ * are inserted.
+ *
+ * @param kctx Context to which file data should be inserted
+ * @param data NULL-terminated string to be inserted to mem_profile file,
+		without trailing new line character
+ * @param size @c buf length
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 00000000000..82f0702974c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 00000000000..325b79082eb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1653 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @dev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct device *dev,
+		dma_addr_t handle, size_t size)
+{
+	dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+
+	if (remainder == 0)
+		return minimum;
+
+	return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 *
+	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+
+	if (kctx == NULL) {
+		/* Only handle this if not already suspended */
+		if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			/* Address space has no context, terminate the work */
+
+			/* AS transaction begin */
+			mutex_lock(&faulting_as->transaction_mutex);
+
+			kbase_mmu_disable_as(kbdev, as_no);
+
+			mutex_unlock(&faulting_as->transaction_mutex);
+			/* AS transaction end */
+
+			kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE);
+			kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE);
+			kbase_pm_context_idle(kbdev);
+		}
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Tranlation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+		if (region->gpu_alloc != region->cpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					region->cpu_alloc, new_pages) == 0) {
+				grown = true;
+			} else {
+				kbase_free_phy_pages_helper(region->gpu_alloc,
+						new_pages);
+			}
+		} else {
+			grown = true;
+		}
+	}
+
+
+	if (grown) {
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
+
+		/* set up the new pages */
+		err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+		if (err) {
+			/* failed to insert pages, handle as a normal PF */
+			mutex_unlock(&faulting_as->transaction_mutex);
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+		kbase_tlstream_aux_pagefault(
+				as_no,
+				atomic_read(&kctx->used_pages));
+#endif
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&faulting_as->transaction_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	phys_addr_t pgd;
+	u64 *page;
+	int i;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	if (kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd) != 0)
+		goto sub_pages;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	return pgd;
+
+alloc_free:
+	kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, false);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return 0;
+	}
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return 0;
+		}
+
+		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+				kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Handle failure condition */
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
+		}
+	}
+
+	return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+					   kbase_dma_addr(p),
+					   PAGE_SIZE);
+
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return -EINVAL;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys, flags);
+		}
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	return 0;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return -EINVAL;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+					phys[i], flags);
+		}
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * This function is responsible for validating the MMU PTs
+ * triggering reguired flushes.
+ *
+ * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+
+	/* We must flush if we're currently running jobs. At the very least, we need to retain the
+	 * context to ensure it doesn't schedule out whilst we're trying to flush it */
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		/* Second level check is to try to only do this when jobs are running. The refcount is
+		 * a heuristic for this. */
+		if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
+			if (!kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+				int ret;
+				u32 op;
+
+				/* AS transaction begin */
+				mutex_lock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+
+				if (kbase_hw_has_issue(kbdev,
+						BASE_HW_ISSUE_6367))
+					op = AS_COMMAND_FLUSH;
+				else
+					op = AS_COMMAND_FLUSH_MEM;
+
+				ret = kbase_mmu_hw_do_operation(kbdev,
+							&kbdev->as[kctx->as_nr],
+							kctx, vpfn, nr,
+							op, 0);
+#if KBASE_GPU_RESET_EN
+				if (ret) {
+					/* Flush failed to complete, assume the
+					 * GPU has hung and perform a reset to
+					 * recover */
+					dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+					if (kbase_prepare_to_reset_gpu(kbdev))
+						kbase_reset_gpu(kbdev);
+				}
+#endif /* KBASE_GPU_RESET_EN */
+
+				mutex_unlock(&kbdev->as[
+						kctx->as_nr].transaction_mutex);
+				/* AS transaction end */
+
+				kbase_pm_context_idle(kbdev);
+			}
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			return -EINVAL;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+			vpfn, phys, nr);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			return -EINVAL;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+					flags);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+
+	return 0;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < 2) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, true);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, true);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (mmu_mode->pte_is_valid(pgd_page[i])) {
+			target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+			dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
+			if (!dump_size) {
+				kunmap(pfn_to_page(PFN_DOWN(pgd)));
+				return 0;
+			}
+			size += dump_size;
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (0 == nr_pages) {
+		/* can't find in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer = (char *)kaddr;
+		size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left);
+
+		if (!size) {
+			vfree(kaddr);
+			return NULL;
+		}
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			vfree(kaddr);
+			return NULL;
+		}
+
+		/* Add the end marker */
+		memcpy(buffer, &end_marker, sizeof(u64));
+	}
+
+	return kaddr;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 *
+	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+		/* Set the MMU into unmapped mode */
+		kbase_mmu_disable_as(kbdev, as_no);
+
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS);
+
+		kbase_pm_context_idle(kbdev);
+	}
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	/* By this point, the fault was handled in some way, so release the ctx refcount */
+	if (kctx != NULL)
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+		return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		KBASE_DEBUG_ASSERT(0);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	kbase_mmu_disable_as(kbdev, as_no);
+
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the runpool_irq lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kctx == NULL) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		if (kctx) {
+			/*
+			 * hw counters dumping in progress, signal the
+			 * other thread that it failed
+			 */
+			if ((kbdev->hwcnt.kctx == kctx) &&
+			    (kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING))
+				kbdev->hwcnt.backend.state =
+							KBASE_INSTR_STATE_FAULT;
+
+			/*
+			 * Stop the kctx from submitting more jobs and cause it
+			 * to be scheduled out/rescheduled when all references
+			 * to it are released
+			 */
+			kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, as->fault_addr);
+
+		}
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		WARN_ON(work_pending(&as->work_busfault));
+		INIT_WORK(&as->work_busfault, bus_fault_worker);
+		queue_work(as->pf_wq, &as->work_busfault);
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		WARN_ON(work_pending(&as->work_pagefault));
+		INIT_WORK(&as->work_pagefault, page_fault_worker);
+		queue_work(as->pf_wq, &as->work_pagefault);
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 00000000000..c6478b2e0a6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100644
index 00000000000..26e683e2960
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_context *kctx);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate);
+	int (*pte_is_valid)(u64 pte);
+	void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 00000000000..2211a1dcf01
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,188 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 * In order to prevent the compiler keeping cached copies of
+	 * memory, we have to explicitly say that we have updated
+	 * memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and
+	 * R1 by specifying register variables that are explicitly
+	 * given registers assignments, the down side of this is that
+	 * we have to assume cpu endianness.  To avoid this we can use
+	 * the ldrd to read the data from memory into R0 and R1 which
+	 * will respect the cpu endianness, we then use strd to make
+	 * the 64 bit assignment to the page table entry.
+	 */
+	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+			"strd r0, r1, [%[pte]]\n\t"
+			: "=m" (*pte)
+			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+			: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device * const kbdev = kctx->kbdev;
+	struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	current_setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		0; /* The other indices are unused for now */
+
+	current_setup->transtab = (u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK);
+
+	current_setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE;
+	current_setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER;
+
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) |
+		get_mmu_flags(flags) |
+		ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 00000000000..5bbd6d48563
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 00000000000..261441fa145
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_instr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1)
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Suspend any counter collection that might be happening */
+	kbase_instr_hwcnt_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Re-enable instrumentation, if it was previously disabled */
+	kbase_instr_hwcnt_resume(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 00000000000..37fa2479df7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 00000000000..7fb674eded3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 00000000000..b5c7758e1bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1134 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_MASK      0xfe
+#define JOB_TYPE_NULL      (1 << 1)
+#define JOB_TYPE_VERTEX    (5 << 1)
+#define JOB_TYPE_TILER     (7 << 1)
+#define JOB_TYPE_FUSED     (8 << 1)
+#define JOB_TYPE_FRAGMENT  (9 << 1)
+
+#define JOB_FLAG_DESC_SIZE           (1 << 0)
+#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct job_head {
+	u32 status;
+	u32 not_complete_index;
+	u64 fault_addr;
+	u16 flags;
+	u16 index;
+	u16 dependencies[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} next;
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_head *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev, "addr               = %p\n"
+			"status             = %x\n"
+			"not_complete_index = %x\n"
+			"fault_addr         = %llx\n"
+			"flags              = %x\n"
+			"index              = %x\n"
+			"dependencies       = %x,%x\n",
+			job, job->status, job->not_complete_index,
+			job->fault_addr, job->flags, job->index,
+			job->dependencies[0],
+			job->dependencies[1]);
+
+	if (job->flags & JOB_FLAG_DESC_SIZE)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct job_head *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	job = kbase_vmap(kctx, *job_header, sizeof(*job), &map);
+	if (!job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->status = JOB_NOT_STARTED;
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX)
+		job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL;
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER;
+
+	if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+	    (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+	    (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->dependencies[0])
+		job->dependencies[0] += hw_job_id_offset;
+	if (job->dependencies[1])
+		job->dependencies[1] += hw_job_id_offset;
+
+	job->index += hw_job_id_offset;
+
+	if (job->flags & JOB_FLAG_DESC_SIZE) {
+		new_job_header = job->next._64;
+		if (!job->next._64)
+			job->next._64 = prev_jc;
+	} else {
+		new_job_header = job->next._32;
+		if (!job->next._32)
+			job->next._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) {
+		bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			fbd_address = job->fragment_fbd._64;
+		else
+			fbd_address = (u64)job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_head *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->index > *hw_job_id)
+			*hw_job_id = job->index;
+
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			jc = job->next._64;
+		else
+			jc = job->next._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			       ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			      ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		dev_err(kctx->kbdev->dev, "Invalid core requirements\n");
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_head *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+#ifdef CONFIG_MALI_DEBUG
+		dev_dbg(dev, "\njob_head structure:\n"
+			     "Source ID:0x%x Access:0x%x Exception:0x%x\n"
+			     "at job addr               = %p\n"
+			     "not_complete_index        = 0x%x\n"
+			     "fault_addr                = 0x%llx\n"
+			     "flags                     = 0x%x\n"
+			     "index                     = 0x%x\n"
+			     "dependencies              = 0x%x,0x%x\n",
+			     JOB_SOURCE_ID(job->status),
+			     ((job->status >> 8) & 0x3),
+			     (job->status  & 0xFF),
+			     job,
+			     job->not_complete_index,
+			     job->fault_addr,
+			     job->flags,
+			     job->index,
+			     job->dependencies[0],
+			     job->dependencies[1]);
+#endif
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		    (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			job_header = job->next._64;
+		else
+			job_header = job->next._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kctx->kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kctx->kbdev);
+
+		return false;
+	}
+
+	if (jctx->sched_info.ctx.is_dying) {
+		dev_dbg(kctx->kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kctx->kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kctx->kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kctx->kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kctx->kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c
new file mode 100644
index 00000000000..a0bb3529baf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.c
+ * Base kernel security capability API
+ */
+
+#include <mali_kbase.h>
+
+static inline bool kbasep_am_i_root(void)
+{
+#if KBASE_HWCNT_DUMP_BYPASS_ROOT
+	return true;
+#else
+	/* Check if root */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+	if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
+		return true;
+#else
+	if (current_euid() == 0)
+		return true;
+#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/
+	return false;
+#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/
+}
+
+/**
+ * kbase_security_has_capability - see mali_kbase_caps.h for description.
+ */
+
+bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags)
+{
+	/* Assume failure */
+	bool access_allowed = false;
+	bool audit = KBASE_SEC_FLAG_AUDIT & flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	CSTD_UNUSED(kctx);
+
+	/* Detect unsupported flags */
+	KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0);
+
+	/* Determine if access is allowed for the given cap */
+	switch (cap) {
+	case KBASE_SEC_MODIFY_PRIORITY:
+	case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT:
+		/* Access is granted only if the caller is privileged */
+		access_allowed = kbasep_am_i_root();
+		break;
+	}
+
+	/* Report problem if requested */
+	if (!access_allowed && audit)
+		dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx);
+
+	return access_allowed;
+}
+
+KBASE_EXPORT_TEST_API(kbase_security_has_capability);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h
new file mode 100644
index 00000000000..024a7ee1aab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.h
+ * Base kernel security capability APIs
+ */
+
+#ifndef _KBASE_SECURITY_H_
+#define _KBASE_SECURITY_H_
+
+/* Security flags */
+#define KBASE_SEC_FLAG_NOAUDIT (0u << 0)	/* Silently handle privilege failure */
+#define KBASE_SEC_FLAG_AUDIT   (1u << 0)	/* Write audit message on privilege failure */
+#define KBASE_SEC_FLAG_MASK    (KBASE_SEC_FLAG_AUDIT)	/* Mask of all valid flag bits */
+
+/* List of unique capabilities that have security access privileges */
+enum kbase_security_capability {
+	/* Instrumentation Counters access privilege */
+	KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1,
+	KBASE_SEC_MODIFY_PRIORITY
+	    /* Add additional access privileges here */
+};
+
+/**
+ * kbase_security_has_capability - determine whether a task has a particular effective capability
+ * @param[in]   kctx    The task context.
+ * @param[in]   cap     The capability to check for.
+ * @param[in]   flags   Additional configuration information
+ *                      Such as whether to write an audit message or not.
+ * @return true if success (capability is allowed), false otherwise.
+ */
+
+bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags);
+
+#endif				/* _KBASE_SECURITY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 00000000000..aa2da3ab7e9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+
+static noinline u32 invoke_smc_fid(u32 function_id, u64 arg0, u64 arg1,
+		u64 arg2, u64 *res0, u64 *res1, u64 *res2)
+{
+	/* 3 args and 3 returns are chosen arbitrarily,
+	   see SMC calling convention for limits */
+	asm volatile(
+			"mov x0, %[fid]\n"
+			"mov x1, %[a0]\n"
+			"mov x2, %[a1]\n"
+			"mov x3, %[a2]\n"
+			"smc #0\n"
+			"str x0, [%[re0]]\n"
+			"str x1, [%[re1]]\n"
+			"str x2, [%[re2]]\n"
+			: [fid] "+r" (function_id), [a0] "+r" (arg0),
+				[a1] "+r" (arg1), [a2] "+r" (arg2)
+			: [re0] "r" (res0), [re1] "r" (res1), [re2] "r" (res2)
+			: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+				"x8", "x9",	"x10", "x11", "x12", "x13",
+				"x14", "x15", "x16", "x17");
+	return function_id;
+}
+
+void kbase_invoke_smc_fid(u32 fid)
+{
+	u64 res0, res1, res2;
+
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	invoke_smc_fid(fid, 0, 0, 0, &res0, &res1, &res2);
+}
+
+void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1,
+		u64 arg2, u64 *res0, u64 *res1, u64 *res2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	/* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	invoke_smc_fid(fid, arg0, arg1, arg2, res0, res1, res2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 00000000000..2d0f5086f75
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Does a secure monitor call with the given function_id
+  * @function_id: The SMC function to call, see SMC Calling convention.
+  */
+void kbase_invoke_smc_fid(u32 function_id);
+
+/**
+  * kbase_invoke_smc_fid - Does a secure monitor call with the given parameters.
+  * see SMC Calling Convention for details
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: ID specifiy which function within the OEN.
+  * @arg0: argument 0 to pass in the SMC call.
+  * @arg1: argument 1 to pass in the SMC call.
+  * @arg2: argument 2 to pass in the SMC call.
+  * @res0: result 0 returned from the SMC call.
+  * @res1: result 1 returned from the SMC call.
+  * @res2: result 2 returned from the SMC call.
+  */
+void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1,
+		u64 arg2, u64 *res0, u64 *res1, u64 *res2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 00000000000..36a61308dc8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,442 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_kbase_hwaccess_time.h>
+#include <linux/version.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+	u64 pfn;
+	u32 offset;
+	char *page;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	pfn = jc >> PAGE_SHIFT;
+	offset = jc & ~PAGE_MASK;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (offset > 0x1000 - sizeof(data)) {
+		/* Wouldn't fit in the page */
+		return 0;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
+	if (reg &&
+	    (reg->flags & KBASE_REG_GPU_WR) &&
+	    reg->cpu_alloc && reg->cpu_alloc->pages)
+		addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+
+	kbase_gpu_vm_unlock(kctx);
+	if (!addr)
+		return 0;
+
+	page = kmap(pfn_to_page(PFN_DOWN(addr)));
+	if (!page)
+		return 0;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	memcpy(page + offset, &data, sizeof(data));
+
+	kbase_sync_single_for_device(katom->kctx->kbdev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+
+	kunmap(pfn_to_page(PFN_DOWN(addr)));
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
+ *
+ * @param katom     The atom to complete
+ */
+static void complete_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	list_del(&katom->dep_item[0]);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	complete_soft_job(katom);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (fence->status < 0)
+#else
+	if (atomic_read(&fence->status) < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	} else if (ret < 0) {
+		goto cancel_atom;
+	}
+	return 1;
+
+ cancel_atom:
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	/* We should cause the dependant jobs in the bag to be failed,
+	 * to do this we schedule the work queue to complete this job */
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return -EINVAL;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return -EINVAL;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return -EINVAL;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signalled, do it now */
+		if (katom->fence) {
+			kbase_fence_trigger(katom, katom->event_code ==
+					BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		list_del(&katom_iter->dep_item[0]);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter);
+		} else {
+			/* The job has not completed */
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASEP_JD_REQ_ATOM_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+			list_add_tail(&katom_iter->dep_item[0],
+					&kctx->waiting_soft_jobs);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.c b/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100644
index 00000000000..c5fb9815255
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 00000000000..6d8e34d3c3a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100644
index 00000000000..ddd0847a69c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+#include <mali_base_kernel_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 00000000000..871dc316c99
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,1695 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of timeline stream. */
+#define KBASEP_TLSTREAM_VERSION 1
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The number of nanoseconds to wait before autoflushing the stream. */
+#define AUTOFLUSH_TIMEOUT  (2ull * NSECS_IN_SEC) /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        2048 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_RET_GPU_CTX,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_GPU_CTX,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+
+	/* Timeline non-object events. */
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @last_write_time: timestamp indicating last write
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	u64      last_write_time;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Indicator of whether the timeline stream file descriptor is already used. */
+static atomic_t tlstream_busy = {0};
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pI",
+		"ctx,ctx_nr"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"context"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_RET_GPU_CTX,
+		__stringify(KBASE_TL_RET_GPU_CTX),
+		"gpu is retained by context",
+		"@pp",
+		"gpu,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_NRET_GPU_CTX,
+		__stringify(KBASE_TL_NRET_GPU_CTX),
+		"gpu is released by context",
+		"@pp",
+		"gpu,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,context"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP),
+		"Job soft stop",
+		"@I",
+		"tag_id"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@II",
+		"as_id,page_cnt"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@l",
+		"page_cnt_change"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+	stream->last_write_time = kbasep_tlstream_get_timestamp();
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data:  unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+	u64                 timestamp = kbasep_tlstream_get_timestamp();
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(data);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (
+				(wb_size > min_size) &&
+				(
+				 timestamp - stream->last_write_time >
+				 AUTOFLUSH_TIMEOUT)) {
+
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size, wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(f_pos);
+	CSTD_UNUSED(filp);
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* Verify if there was no overflow in selected stream. Make sure
+		 * that if incorrect size was used we will know about it. */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+	atomic_set(&tlstream_busy, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+	int                 rcode;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 1);
+	setup_timer(&autoflush_timer,
+			kbasep_tlstream_autoflush_timer_callback,
+			0);
+	rcode = mod_timer(
+			&autoflush_timer,
+			jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
+{
+	if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) {
+		*fd = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd) {
+			atomic_set(&tlstream_busy, 0);
+			return *fd;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+	} else {
+		*fd = -EBUSY;
+	}
+
+	return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_GPU_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_GPU_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_job_softstop(u32 js_id)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 00000000000..494f5c784f6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,284 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx: kernel common context
+ * @fd:   timeline stream file descriptor
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ *         descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+void kbase_tlstream_tl_del_ctx(void *context);
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+void kbase_tlstream_tl_del_atom(void *atom);
+
+/**
+ * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context
+ * @gpu:     name of the GPU object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that GPU object is being held
+ * by context and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context);
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu);
+
+/**
+ * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context
+ * @gpu:     name of the GPU object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that GPU object is being released
+ * by context.
+ */
+void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context);
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+void kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+void kbase_tlstream_aux_job_softstop(u32 js_id);
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @mmu_as:     MMU address space number
+ * @page_count: number of currently used pages
+ */
+void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count);
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ *                                 pages is changed
+ * @page_count_change: number of pages to be added or subtracted (according to
+ *                     the sign)
+ */
+void kbase_tlstream_aux_pagesalloc(s64 page_count_change);
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 00000000000..e2e0544208c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 00000000000..aac9858875a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbase_backend_inspect_head(kbdev, js);
+			WARN_ON(!next_katom);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = true;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the end of the transition */
+	if (kbdev->timeline.l2_transitioning) {
+		kbdev->timeline.l2_transitioning = false;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 00000000000..6a3cd407e4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,   \
+				(int)kctx->timeline.owner_tgid,              \
+				count);                                      \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                             \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,              \
+				CTX_FLOW_ATOM_READY,                         \
+				(int)kctx->timeline.owner_tgid,              \
+				atom_id);                                    \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_ACTIVE,                      \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_NEXT,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_HEAD,                        \
+				(int)kctx->timeline.owner_tgid,              \
+				js, count);                                  \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,   \
+				SW_SET_GPU_SLOT_STOPPING,                    \
+				(kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+				js, count);                                  \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                              \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_ACTIVE, active);            \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                            \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_TILER_ACTIVE,               \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                           \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_SHADER_ACTIVE,              \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                               \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,  \
+				SW_SET_GPU_POWER_L2_ACTIVE,                  \
+				hweight64(bitmap));                          \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                    \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_POWERING,               \
+				1);                                          \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                     \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,   \
+				SW_FLOW_GPU_POWER_L2_ACTIVE,                 \
+				1);                                          \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id)         \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_SEND_EVENT,                       \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id)       \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+				SW_FLOW_PM_HANDLE_EVENT,                     \
+				event_type, pm_event_id);                    \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_START_GPU_JOB_CHAIN_SW_APPROX,            \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _consumerof_atom_number);                \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,         \
+				HW_STOP_GPU_JOB_CHAIN_SW_APPROX,             \
+				(int)kctx->timeline.owner_tgid,              \
+				js, _producerof_atom_number_completed);      \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,     \
+				trace_code, 1);                              \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                          \
+	do {                                                                 \
+		struct timespec ts;                                          \
+		getrawmonotonic(&ts);                                        \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,    \
+				count);                                      \
+	} while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+		enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbase_jd_atom *katom, int js,
+		kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 00000000000..a7dc3faf4c7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,     "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,      "HW: Job Chain stop (SW approximated)",  "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100644
index 00000000000..5139014ec2c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,561 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+#define BASE_UK_VERSION_MAJOR 9
+#define BASE_UK_VERSION_MINOR 0
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	u64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	u64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_dump_fault_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	u64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 unused_1; /* keep for backwards compatibility */
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN ((u32)0x00000001)
+struct base_cpu_id_props {
+	/**
+	 * CPU ID
+	 */
+	u32 id;
+
+	/**
+	 * CPU Part number
+	 */
+	u16 part;
+	/**
+	 * ASCII code of implementer trademark
+	 */
+	u8 implementer;
+
+	/**
+	 * CPU Variant
+	 */
+	u8 variant;
+	/**
+	 * CPU Architecture
+	 */
+	u8 arch;
+
+	/**
+	 * CPU revision
+	 */
+	u8 rev;
+
+	/**
+	 * Validity of CPU id where 0-invalid and
+	 * 1-valid only if ALL the cpu_id props are valid
+	 */
+	u8 valid;
+
+	u8 padding[1];
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct base_cpu_props {
+	u32 nr_cores;        /**< Number of CPU cores */
+
+	/**
+	 * CPU page size as a Logarithm to Base 2. The compile-time
+	 * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+	 */
+	u32 cpu_page_size_log2;
+
+	/**
+	 * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time
+	 * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2.
+	 */
+	u32 cpu_l1_dcache_line_size_log2;
+
+	/**
+	 * CPU L1 Data cache size, in bytes. The compile-time equivalient is
+	 * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE.
+	 *
+	 * This CPU Property is mainly provided to implement OpenCL's
+	 * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
+	 * hint to be queried.
+	 */
+	u32 cpu_l1_dcache_size;
+
+	/**
+	 * CPU Property Flags bitpattern.
+	 *
+	 * This is a combination of bits as specified by the macros prefixed with
+	 * 'BASE_CPU_PROPERTY_FLAG_'.
+	 */
+	u32 cpu_flags;
+
+	/**
+	 * Maximum clock speed in MHz.
+	 * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's
+	 * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint.
+	 */
+	u32 max_cpu_clock_speed_mhz;
+
+	/**
+	 * @brief Total memory, in bytes.
+	 *
+	 * This is the theoretical maximum memory available to the CPU. It is
+	 * unlikely that a client will be able to allocate all of this memory for
+	 * their own purposes, but this at least provides an upper bound on the
+	 * memory available to the CPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices.
+	 */
+	u64 available_memory_size;
+
+	/**
+	 * CPU ID detailed info
+	 */
+	struct base_cpu_id_props cpu_id;
+
+	u32 padding;
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct kbase_uk_cpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct base_cpu_props props;
+	/* OUT */
+};
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	u64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+	u64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#define KBASE_MAXIMUM_EXT_RESOURCES       255
+
+struct kbase_uk_ext_buff_kds_data {
+	union uk_header header;
+	union kbase_pointer external_resource;
+	union kbase_pointer file_descriptor;
+	u32 num_res;		/* limited to KBASE_MAXIMUM_EXT_RESOURCES */
+	u32 padding;
+};
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+	union uk_header header;
+	/* OUT */
+	int id;
+};
+
+#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
+	defined(CONFIG_MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @fd:     timeline stream file descriptor
+ *
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+	s32  fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+	union uk_header header;
+	/* IN */
+	/* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header:    UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+	union uk_header header;
+	/* IN */
+	u32 tpw_count;
+	u32 msg_delay;
+	u32 msg_count;
+	u32 aux_msg;
+	/* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header:          UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+	union uk_header header; /**< UK structure header. */
+	/* IN */
+	/* OUT */
+	u32 bytes_collected;
+	u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_KTLSTREAM_ENABLED */
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE = (UK_FUNC_ID + 13),
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+	KBASE_FUNC_DUMP_FAULT_TERM = (UK_FUNC_ID + 30),
+
+	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
+	defined(CONFIG_MALI_MIPE_ENABLED)
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_KTLSTREAM_ENABLED */
+
+	KBASE_FUNC_MAX
+};
+
+#endif				/* _KBASE_UKU_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 00000000000..be474ff8740
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 00000000000..fd7252dab0d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 00000000000..d1c68702e9c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,485 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+struct kbase_vinstr_context {
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	struct kbase_vmap_struct vmap;
+	struct mutex lock;
+	u64 gpu_va;
+	void *cpu_va;
+	size_t dump_size;
+	u32 nclients;
+	struct list_head clients;
+};
+
+struct kbase_vinstr_client {
+	bool kernel;
+	void *dump_buffer;
+	u32 bitmap[4];
+	void *accum_buffer;
+	size_t dump_size;
+	struct list_head list;
+};
+
+static int map_kernel_dump_buffer(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = ctx->kctx;
+	u64 flags, nr_pages;
+	u16 va_align = 0;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	ctx->dump_size = kbase_vinstr_dump_size(ctx);
+	nr_pages = PFN_UP(ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&ctx->gpu_va, &va_align);
+	if (!reg)
+		return -ENOMEM;
+
+	ctx->cpu_va = kbase_vmap(kctx, ctx->gpu_va, ctx->dump_size, &ctx->vmap);
+	if (!ctx->cpu_va) {
+		kbase_mem_free(kctx, ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void unmap_kernel_dump_buffer(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_context *kctx = ctx->kctx;
+
+	kbase_vunmap(kctx, &ctx->vmap);
+	kbase_mem_free(kctx, ctx->gpu_va);
+}
+
+static int map_client_accum_buffer(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	cli->dump_size = kbase_vinstr_dump_size(ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	return !cli->accum_buffer ? -ENOMEM : 0;
+}
+
+static void unmap_client_accum_buffer(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	kfree(cli->accum_buffer);
+}
+
+static int create_vinstr_kctx(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_uk_hwcnt_setup setup;
+	int err;
+
+	ctx->kctx = kbase_create_context(ctx->kbdev, true);
+	if (!ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = map_kernel_dump_buffer(ctx);
+	if (err) {
+		kbase_destroy_context(ctx->kctx);
+		return err;
+	}
+
+	setup.dump_buffer = ctx->gpu_va;
+	/* The GPU requires us to disable the prfcnt collection block for
+	 * reprogramming it.  This introduces jitter and disrupts existing
+	 * clients.  Therefore we enable all of them. */
+	setup.jm_bm = 0xffffffff;
+	setup.tiler_bm = 0xffffffff;
+	setup.shader_bm = 0xffffffff;
+	setup.mmu_l2_bm = 0xffffffff;
+
+	err = kbase_instr_hwcnt_enable(ctx->kctx, &setup);
+	if (err) {
+		unmap_kernel_dump_buffer(ctx);
+		kbase_destroy_context(ctx->kctx);
+		return err;
+	}
+
+	return 0;
+}
+
+static void destroy_vinstr_kctx(struct kbase_vinstr_context *ctx)
+{
+	kbase_instr_hwcnt_disable(ctx->kctx);
+	unmap_kernel_dump_buffer(ctx);
+	kbase_destroy_context(ctx->kctx);
+	ctx->kctx = NULL;
+}
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+	INIT_LIST_HEAD(&ctx->clients);
+	mutex_init(&ctx->lock);
+	ctx->kbdev = kbdev;
+	return ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	while (!list_empty(&ctx->clients)) {
+		cli = list_first_entry(&ctx->clients,
+				struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		unmap_client_accum_buffer(ctx, cli);
+		kfree(cli);
+		ctx->nclients--;
+	}
+	if (ctx->kctx)
+		destroy_vinstr_kctx(ctx);
+	kfree(ctx);
+}
+
+struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx,
+		bool kernel, u64 dump_buffer, u32 bitmap[4])
+{
+	struct kbase_vinstr_client *cli;
+
+	cli = kmalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->kernel = kernel;
+	cli->dump_buffer = (void *)(uintptr_t)dump_buffer;
+	cli->bitmap[SHADER_HWCNT_BM] = bitmap[SHADER_HWCNT_BM];
+	cli->bitmap[TILER_HWCNT_BM] = bitmap[TILER_HWCNT_BM];
+	cli->bitmap[MMU_L2_HWCNT_BM] = bitmap[MMU_L2_HWCNT_BM];
+	cli->bitmap[JM_HWCNT_BM] = bitmap[JM_HWCNT_BM];
+
+	mutex_lock(&ctx->lock);
+	/* If this is the first client, create the vinstr kbase
+	 * context.  This context is permanently resident until the
+	 * last client exits. */
+	if (!ctx->nclients) {
+		if (create_vinstr_kctx(ctx) < 0) {
+			kfree(cli);
+			mutex_unlock(&ctx->lock);
+			return NULL;
+		}
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it.  We need a per client kernel buffer for accumulating
+	 * the counters. */
+	if (map_client_accum_buffer(ctx, cli) < 0) {
+		kfree(cli);
+		if (!ctx->nclients)
+			destroy_vinstr_kctx(ctx);
+		mutex_unlock(&ctx->lock);
+		return NULL;
+	}
+
+	ctx->nclients++;
+	list_add(&cli->list, &ctx->clients);
+	mutex_unlock(&ctx->lock);
+
+	return cli;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_client *iter, *tmp;
+
+	mutex_lock(&ctx->lock);
+	list_for_each_entry_safe(iter, tmp, &ctx->clients, list) {
+		if (iter == cli) {
+			list_del(&iter->list);
+			unmap_client_accum_buffer(ctx, cli);
+			kfree(iter);
+			ctx->nclients--;
+			if (!ctx->nclients)
+				destroy_vinstr_kctx(ctx);
+			break;
+		}
+	}
+	mutex_unlock(&ctx->lock);
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_device *kbdev = ctx->kctx->kbdev;
+	size_t dump_size;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else {
+		/* assume v5 for now */
+		u32 nr_l2, nr_sc;
+
+		nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+		nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_sc) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = ctx->cpu_va;
+	u32 nr_cg = ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = ctx->kctx->kbdev;
+	u32 i, nr_l2, nr_sc;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+	for (i = 0; i < nr_sc; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+}
+
+static void accum_clients(struct kbase_vinstr_context *ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4;
+
+	v4 = kbase_hw_has_feature(ctx->kbdev, BASE_HW_FEATURE_V4);
+	list_for_each_entry(iter, &ctx->clients, list) {
+		if (v4)
+			patch_dump_buffer_hdr_v4(ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(ctx, iter);
+		accum_dump_buffer(iter->accum_buffer, ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+int kbase_vinstr_dump(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	int err = 0;
+
+	if (!cli)
+		return -EINVAL;
+
+	mutex_lock(&ctx->lock);
+	err = kbase_instr_hwcnt_request_dump(ctx->kctx);
+	if (err)
+		goto out;
+
+	err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx);
+	if (err)
+		goto out;
+
+	accum_clients(ctx);
+
+	if (!cli->kernel) {
+		if (copy_to_user((void __user *)cli->dump_buffer,
+				cli->accum_buffer, cli->dump_size)) {
+			err = -EFAULT;
+			goto out;
+		}
+	} else {
+		memcpy(cli->dump_buffer, cli->accum_buffer, cli->dump_size);
+	}
+
+	memset(cli->accum_buffer, 0, cli->dump_size);
+out:
+	mutex_unlock(&ctx->lock);
+	return err;
+}
+
+int kbase_vinstr_clear(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli)
+{
+	int err = 0;
+
+	if (!cli)
+		return -EINVAL;
+
+	mutex_lock(&ctx->lock);
+	err = kbase_instr_hwcnt_request_dump(ctx->kctx);
+	if (err)
+		goto out;
+
+	err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx);
+	if (err)
+		goto out;
+
+	err = kbase_instr_hwcnt_clear(ctx->kctx);
+	if (err)
+		goto out;
+
+	accum_clients(ctx);
+
+	memset(cli->accum_buffer, 0, cli->dump_size);
+out:
+	mutex_unlock(&ctx->lock);
+	return err;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 00000000000..2e846e81a80
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/**
+ * kbase_vinstr_init() - Initialize the vinstr core
+ * @kbdev:	Kbase device
+ *
+ * Return:	A pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - Terminate the vinstr core
+ * @ctx:	Vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *ctx);
+
+/**
+ * kbase_vinstr_attach_client - Attach a client to the vinstr core
+ * @ctx:		Vinstr context
+ * @kernel:		True if this client is a kernel-side client, false
+ *                      otherwise
+ * @dump_buffer:	Client's dump buffer
+ * @bitmap:		Bitmaps describing which counters should be enabled
+ *
+ * Return:		A vinstr opaque client handle or NULL or failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx,
+		bool kernel,
+		u64 dump_buffer,
+		u32 bitmap[4]);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @ctx:	Vinstr context
+ * @cli:	Pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_dump_size - Get the size of the dump buffer
+ * @ctx:	Vinstr context
+ *
+ * This is only useful for kernel-side clients to know how much
+ * memory they need to allocate to receive the performance counter
+ * memory block.
+ *
+ * Return:	Returns the size of the client side buffer
+ */
+size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx);
+
+/**
+ * kbase_vinstr_dump - Performs a synchronous hardware counter dump for a given
+ *                     kbase context
+ * @ctx:	Vinstr context
+ * @cli:	Pointer to vinstr client
+ *
+ * Return:	0 on success
+ */
+int kbase_vinstr_dump(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_clear - Performs a reset of the hardware counters for a given
+ *                      kbase context
+ * @ctx:	Vinstr context
+ * @cli:	Pointer to vinstr client
+ *
+ * Return:	0 on success
+ */
+int kbase_vinstr_clear(struct kbase_vinstr_context *ctx,
+		struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 00000000000..5d6b4021d62
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 00000000000..fc3cf32ba4d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,211 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+/**
+ * mali_sw_counter - not currently used
+ * @event_id: counter id
+ */
+TRACE_EVENT(mali_sw_counter,
+	TP_PROTO(unsigned int event_id, signed long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lld", __entry->event_id, __entry->value)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 00000000000..99452933eab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 00000000000..180fea1dfd1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
+						size */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_AS_SHIFT    4	/* address space bitmap starts from bit 4 of the register */
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+
+/* Symbol for default MEMATTR to use */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID         (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+/* End GPU_ID register */
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* COHERENCY_* values*/
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     0xFFFF
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+/* End COHERENCY_* values */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 00000000000..d8286a36009
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->atom_id,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->tgid,
+			__entry->js,
+			__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+			(int)__entry->ts_sec,
+			(int)__entry->ts_nsec,
+			__entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 00000000000..841d03fb587
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/Kbuild b/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100644
index 00000000000..558657bbced
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 00000000000..8fb4e917c4f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 00000000000..679945bec52
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 00000000000..046302e8e41
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..f92961c53a0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,86 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100644
index 00000000000..6f8a72555e3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret;
+
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+	ret = pm_runtime_get_sync(kbdev->dev);
+
+	dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	pm_runtime_enable(kbdev->dev);
+#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS
+	{
+		int err = kbase_platform_create_sysfs_file(kbdev->dev);
+
+		if (err)
+			return err;
+	}
+#endif				/* CONFIG_MALI_MIDGARD_DEBUG_SYS */
+	return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef CONFIG_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* CONFIG_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* CONFIG_PM_RUNTIME */
+};
+
+
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100644
index 00000000000..5b6ef37bb71
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100644
index 00000000000..ccfd8ccb012
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_opp *sopp;
+	int i;
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	sopp = scpi_dvfs_get_opps(2);
+	if (IS_ERR_OR_NULL(sopp))
+		return PTR_ERR(sopp);
+
+	for (i = 0; i < sopp->count; i++) {
+		struct scpi_opp_entry *e = &sopp->opp[i];
+
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq_hz, e->volt_mv);
+
+		dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+	}
+
+	return 0;
+}
+
+static int juno_setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		pr_err("Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		pr_err("Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100644
index 00000000000..86f2bb50caf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,172 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long juno_model_static_power(unsigned long voltage)
+{
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long juno_model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops juno_model_ops = {
+	.get_static_power = juno_model_static_power,
+	.get_dynamic_power = juno_model_dynamic_power,
+};
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+
+static int juno_secure_mode_enable(struct kbase_device *kbdev)
+{
+	/* TODO: enable secure mode */
+	/*dev_err(kbdev->dev, "SWITCHING TO SECURE\n");*/
+	return 0; /* all ok */
+}
+
+static int juno_secure_mode_disable(struct kbase_device *kbdev)
+{
+	/* TODO: Turn off secure mode and reset GPU */
+	/*dev_err(kbdev->dev, "SWITCHING TO NON-SECURE\n");*/
+	return 0; /* all ok */
+}
+
+struct kbase_secure_ops juno_secure_ops = {
+	.secure_mode_enable = juno_secure_mode_enable,
+	.secure_mode_disable = juno_secure_mode_disable,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..fa5e9e9a5b1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&juno_model_ops)
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (&juno_secure_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_secure_ops juno_secure_ops;
diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100644
index 00000000000..7cb3be7f78c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100644
index 00000000000..01f9dfce93c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 00000000000..084a1561343
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..ac5060af6a7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..687b1a8c043
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100644
index 00000000000..9bc51f1e2da
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START	(0x10000000)
+#define SYS_CFGDATA_OFFSET		(0x000000A0)
+#define SYS_CFGCTRL_OFFSET		(0x000000A4)
+#define SYS_CFGSTAT_OFFSET		(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		(1 << 31)
+#define READ_REG_BIT_VALUE			(0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			(0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		(1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			(1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE	(0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE		(2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		(1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			(1 <<  1)
+
+#define FEED_REG_BIT_MASK			(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	int err = 0;
+	u32 reg_val = 0;
+	u32 osc2_value = 0;
+	u32 pa_divide = 0;
+	u32 pb_divide = 0;
+	u32 pc_divide = 0;
+	void __iomem *syscfg_reg = NULL;
+	void __iomem *scc_reg = NULL;
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+
+	/* Init the value in case something goes wrong */
+	*cpu_clock = 0;
+
+	/* Map CPU register into virtual memory */
+	syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+	if (syscfg_reg == NULL) {
+		err = -EIO;
+		goto syscfg_reg_map_failed;
+	}
+
+	scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+	if (scc_reg == NULL) {
+		err = -EIO;
+		goto scc_reg_map_failed;
+	}
+
+	raw_spin_lock(&syscfg_lock);
+
+	/* Read SYS regs - OSC2 */
+	reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+	/* Check if there is any other undergoing request */
+	if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+		err = -EBUSY;
+		goto ongoing_request;
+	}
+	/* Reset the CGFGSTAT reg */
+	writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+	writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+			DCC_DEFAULT_BIT_VALUE |
+			SYS_CFG_OSC_FUNC_BIT_VALUE |
+			SITE_DEFAULT_BIT_VALUE |
+			BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+			DEVICE_DEFAULT_BIT_VALUE,
+			(syscfg_reg + SYS_CFGCTRL_OFFSET));
+	/* Wait for the transaction to complete */
+	while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+			SYS_CFG_COMPLETE_BIT_VALUE))
+		;
+	/* Read SYS_CFGSTAT Register to get the status of submitted
+	 * transaction */
+	reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+	if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+		/* Error while setting register */
+		err = -EIO;
+		goto set_reg_error;
+	}
+
+	osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+	/* Read the SCC CFGRW0 register */
+	reg_val = readl(scc_reg);
+
+	/*
+	 * Select the appropriate feed:
+	 * CFGRW0[0] - CLKOB
+	 * CFGRW0[1] - CLKOC
+	 * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+	 */
+	/* Calculate the  FCLK */
+	if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+		/* CFGRW0[0] - CLKOB */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[10:7] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+		/* CFGRW0[1] - CLKOC */
+		/* CFGRW0[6:3] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[14:11] */
+		pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+				FCLK_PC_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+	} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+		/* CFGRW0[2] - FACLK */
+		/* CFGRW0[18:15] */
+		pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PA_DIVIDE_BIT_SHIFT);
+		/* CFGRW0[22:19] */
+		pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+				AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+				AXICLK_PB_DIVIDE_BIT_SHIFT);
+		*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+	} else {
+		err = -EIO;
+	}
+
+set_reg_error:
+ongoing_request:
+	raw_spin_unlock(&syscfg_lock);
+	*cpu_clock /= HZ_IN_MHZ;
+
+	if (!err)
+		cpu_clock_speed = *cpu_clock;
+
+	iounmap(scc_reg);
+
+scc_reg_map_failed:
+	iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+	return err;
+}
+
+u32 kbase_get_platform_logic_tile_type(void)
+{
+	void __iomem *syscfg_reg = NULL;
+	u32 sys_procid1 = 0;
+
+	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+
+	sys_procid1 = (NULL != syscfg_reg) ? readl(syscfg_reg) : 0;
+
+	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100644
index 00000000000..ef9bfd72161
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type 
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function is called by kbase_common_device_init to determine
+ * which LogicTile type is used by the platform by reading the HBI value
+ * of the daughterboard which holds the LogicTile:
+ *
+ * 0x192 HBI0192 Virtex-5
+ * 0x217 HBI0217 Virtex-6
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+u32 kbase_get_platform_logic_tile_type(void);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 00000000000..d9bfabc28b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..11c320ecc1f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,86 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..3ff0930fb4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 00000000000..0cb41ce8952
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..9bc2985fef0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,88 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value:  NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..76ffe4a1e59
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100644
index 00000000000..816dff49835
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100644
index 00000000000..23647ccb087
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 00000000000..5fa9b39c4bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild
new file mode 100644
index 00000000000..f10d58c70df
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
new file mode 100644
index 00000000000..60b465c56c5
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
new file mode 100644
index 00000000000..2869f587266
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100644
index 00000000000..d3e0086ff02
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100644
index 00000000000..64d87b60ff6
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100644
index 00000000000..c7c3a226a86
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100644
index 00000000000..ede07ff9fc0
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	crtc->fb = fb;
+#else
+	crtc->primary->fb = fb;
+#endif
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+			struct drm_pending_vblank_event *event)
+#else
+			struct drm_pending_vblank_event *event,
+			uint32_t flags)
+#endif
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel);
+#else
+			crtc->primary->fb->bits_per_pixel);
+#endif
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+	ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+			crtc->fb->bits_per_pixel > 16) {
+#else
+			crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100644
index 00000000000..4bf20fec246
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100644
index 00000000000..af92498784f
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,336 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+	platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+	driver.kdriver.platform_device = dev;
+#endif
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_platform_exit(&driver, dev);
+#else
+	drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100644
index 00000000000..1131f46b27d
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+#else
+	drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100644
index 00000000000..78c91c08106
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100644
index 00000000000..f575c9e5f7c
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100644
index 00000000000..494baa0d057
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100644
index 00000000000..13fb2560569
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+#else
+	drm_gem_free_mmap_offset(obj);
+#endif
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_attrs(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL,
+					&attrs);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_attrs failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#else
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+#else
+		drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	if (bo->type & PL111_BOT_DMA) {
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr,
+			&attrs);
+		}
+#else
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+#else
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		goto fail;
+	}
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_hash_item *hash;
+	struct drm_local_map *map = NULL;
+#else
+	struct drm_vma_offset_node *node;
+#endif
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+#else
+	node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+			vma->vm_pgoff,
+			vma_pages(vma));
+	obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	/* Our page fault handler uses the page offset calculated from the vma,
+	 * so we need to remove the gem cookie offset specified in the call.
+	 */
+	vma->vm_pgoff = 0;
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100644
index 00000000000..1d613d0592a
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100644
index 00000000000..9d5ec0cb175
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100644
index 00000000000..d033566a579
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100644
index 00000000000..ff602efd6d6
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Our mmap calls setup a valid vma->vm_pgoff
+	 * so we can use vmf->pgoff
+	 */
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+				vmf->pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[vmf->pgoff]);
+		pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/include/linux/dma-buf-test-exporter.h b/include/linux/dma-buf-test-exporter.h
new file mode 100644
index 00000000000..98984ba6032
--- /dev/null
+++ b/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/include/linux/kds.h b/include/linux/kds.h
new file mode 100644
index 00000000000..1346edae04c
--- /dev/null
+++ b/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/include/linux/ump-common.h b/include/linux/ump-common.h
new file mode 100644
index 00000000000..0015bda6ffc
--- /dev/null
+++ b/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/include/linux/ump-import.h b/include/linux/ump-import.h
new file mode 100644
index 00000000000..89ce727d4f8
--- /dev/null
+++ b/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/include/linux/ump-ioctl.h b/include/linux/ump-ioctl.h
new file mode 100644
index 00000000000..451403e7c1e
--- /dev/null
+++ b/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/include/linux/ump.h b/include/linux/ump.h
new file mode 100644
index 00000000000..16f9c39f69c
--- /dev/null
+++ b/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a true to indicate that access to the handle is allowed or @n
+ * @a false to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a true to permit access
+ * @li @a false to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
author	Guillaume Tucker <guillaume.tucker@arm.com>	2015-08-03 10:18:15 +0100
committer	Jon Medhurst <tixy@linaro.org>	2015-09-02 16:38:15 +0100
commit	c72936f406fe79ca529be1ae9748c68b77b90dac (patch)
tree	9fb0a9ce121efe885f73d19b1c92d0d88e5e1e8e
parent	9dbcbf30a5a0b499a277f7f0a4c318f71ceb8672 (diff)