gator: Version 5.16

Signed-off-by: Jon Medhurst <tixy@linaro.org>
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
index 3af8b8d..0d4ca68 100644
--- a/drivers/gator/Makefile
+++ b/drivers/gator/Makefile
@@ -21,10 +21,13 @@
 		gator_events_mali_t6xx_hw.o
 include $(M)/mali_t6xx.mk
 else
-gator-y +=	gator_events_mali_400.o
+gator-y +=	gator_events_mali_4xx.o
 endif
 gator-y +=	gator_events_mali_common.o
 EXTRA_CFLAGS +=	-DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
+ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
+EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE)
+endif
 endif
 
 # GATOR_TEST controls whether to include (=1) or exclude (=0) test code. 
@@ -33,9 +36,12 @@
 
 gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
 			gator_events_armv7.o \
+			gator_events_ccn-504.o \
 			gator_events_l2c-310.o \
 			gator_events_scorpion.o
 
+gator-$(CONFIG_ARM64) +=	gator_events_ccn-504.o
+
 $(obj)/gator_main.o: gator_events.h
 
 clean-files := gator_events.h
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
index 205cbcd..2e122da 100644
--- a/drivers/gator/gator.h
+++ b/drivers/gator/gator.h
@@ -20,8 +20,6 @@
 #define GATOR_CPU_FREQ_SUPPORT  (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ)
 #define GATOR_IKS_SUPPORT       defined(CONFIG_BL_SWITCHER)
 
-#define GATOR_LIVE 1
-
 // cpu ids
 #define ARM1136     0xb36
 #define ARM1156     0xb56
@@ -31,6 +29,7 @@
 #define CORTEX_A7   0xc07
 #define CORTEX_A8   0xc08
 #define CORTEX_A9   0xc09
+#define CORTEX_A12  0xc0d
 #define CORTEX_A15  0xc0f
 #define SCORPION    0x00f
 #define SCORPIONMP  0x02d
@@ -46,9 +45,14 @@
 
 struct gator_cpu {
 	const int cpuid;
+	// Human readable name
 	const char core_name[MAXSIZE_CORE_NAME];
+	// Perf PMU name
 	const char * const pmu_name;
+	// gatorfs event name
 	const char * const pmnc_name;
+	// compatible from Documentation/devicetree/bindings/arm/cpus.txt
+	const char * const dt_name;
 	const int pmnc_counters;
 };
 
diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c
index ad9f309..5b9399b 100644
--- a/drivers/gator/gator_annotate.c
+++ b/drivers/gator/gator_annotate.c
@@ -39,14 +39,17 @@
 static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset)
 {
 	int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
+	bool interrupt_context;
 
 	if (*offset) {
 		return -EINVAL;
 	}
 
-	// Annotations are not supported in interrupt context
-	if (in_interrupt()) {
-		printk(KERN_WARNING "gator: Annotations are not supported in interrupt context\n");
+	interrupt_context = in_interrupt();
+	// Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code.
+	//   By doing so, annotations in interrupt context can result in deadlocks and lost data.
+	if (interrupt_context) {
+		printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
 		return -EINVAL;
 	}
 
@@ -77,7 +80,19 @@
 	if (size <= 0) {
 		// Buffer is full, wait until space is available
 		spin_unlock(&annotate_lock);
+
+		// Drop the annotation as blocking is not allowed in interrupt context
+		if (interrupt_context) {
+			return -EINVAL;
+		}
+
 		wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
+
+		// Check to see if a signal is pending
+		if (signal_pending(current)) {
+			return -EINTR;
+		}
+
 		goto retry;
 	}
 
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
index c332187..5f98a1c 100644
--- a/drivers/gator/gator_cookies.c
+++ b/drivers/gator/gator_cookies.c
@@ -240,13 +240,13 @@
 
 	if (strcmp(text, "app_process") == 0) {
 		if (!translate_app_process(&text, cpu, task, from_wq))
-			return INVALID_COOKIE;
+			return UNRESOLVED_COOKIE;
 	}
 
 	// Can be called from interrupt handler or from work queue or from scheduler trace
 	local_irq_save(flags);
 
-	cookie = INVALID_COOKIE;
+	cookie = UNRESOLVED_COOKIE;
 	if (marshal_cookie_header(text)) {
 		cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids;
 		cookiemap_add(key, cookie);
@@ -272,7 +272,7 @@
 		return get_cookie(cpu, task, text, false);
 	}
 
-	return INVALID_COOKIE;
+	return UNRESOLVED_COOKIE;
 }
 
 static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset)
@@ -302,7 +302,7 @@
 	}
 
 	if (!vma)
-		cookie = INVALID_COOKIE;
+		cookie = UNRESOLVED_COOKIE;
 
 	return cookie;
 }
diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c
new file mode 100644
index 0000000..b91a9a1
--- /dev/null
+++ b/drivers/gator/gator_events_ccn-504.c
@@ -0,0 +1,306 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*******************************************************************************
+ * WARNING: This code is an experimental implementation of the CCN-504 hardware
+ * counters which has not been tested on the hardware. Commented debug
+ * statements are present and can be uncommented for diagnostic purposes.
+ ******************************************************************************/
+
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include "gator.h"
+
+#define PERIPHBASE 0x2E000000
+
+#define NUM_REGIONS 256
+#define REGION_SIZE (64*1024)
+#define REGION_DEBUG 1
+#define REGION_XP 64
+
+// DT (Debug) region
+#define PMEVCNTSR0    0x0150
+#define PMCCNTRSR     0x0190
+#define PMCR          0x01A8
+#define PMSR          0x01B0
+#define PMSR_REQ      0x01B8
+#define PMSR_CLR      0x01C0
+
+// XP region
+#define DT_CONFIG     0x0300
+
+// Multiple
+#define PMU_EVENT_SEL 0x0600
+#define OLY_ID        0xFF00
+
+#define CCNT 4
+#define CNTMAX (4 + 1)
+
+#define get_pmu_event_id(event) (((event) >> 0) & 0xFF)
+#define get_node_type(event) (((event) >> 8) & 0xFF)
+#define get_region(event) (((event) >> 16) & 0xFF)
+
+MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address");
+static unsigned long ccn504_addr = 0;
+module_param(ccn504_addr, ulong, 0444);
+
+static void __iomem *gator_events_ccn504_base;
+static unsigned long gator_events_ccn504_enabled[CNTMAX];
+static unsigned long gator_events_ccn504_event[CNTMAX];
+static unsigned long gator_events_ccn504_key[CNTMAX];
+static int gator_events_ccn504_buffer[2*CNTMAX];
+
+static void gator_events_ccn504_create_shutdown(void)
+{
+	if (gator_events_ccn504_base != NULL) {
+		iounmap(gator_events_ccn504_base);
+	}
+}
+
+static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+	char buf[32];
+
+	for (i = 0; i < CNTMAX; ++i) {
+		if (i == CCNT) {
+			snprintf(buf, sizeof(buf), "CCN-504_ccnt");
+		} else {
+			snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i);
+		}
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir) {
+			return -1;
+		}
+
+		gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]);
+		if (i != CCNT) {
+			gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]);
+		}
+		gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]);
+	}
+
+	return 0;
+}
+
+static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value)
+{
+	u32 dt_config;
+
+	dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+	dt_config |= (value + event_num) << (4*event_num);
+	//printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, dt_config, (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+	writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+}
+
+static int gator_events_ccn504_start(void)
+{
+	int i;
+
+	// Disable INTREQ on overflow
+	// [6] ovfl_intr_en = 0
+	// perhaps set to 1?
+	// [5] cntr_rst = 0
+	// No register paring
+	// [4:1] cntcfg = 0
+	// Enable PMU features
+	// [0] pmu_en = 1
+	//printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0x1, REGION_DEBUG*REGION_SIZE + PMCR);
+	writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR);
+
+	// Assume no other pmu_event_sel registers are set
+
+	// cycle counter does not need to be enabled
+	for (i = 0; i < CCNT; ++i) {
+		int pmu_event_id;
+		int node_type;
+		int region;
+		u32 pmu_event_sel;
+		u32 oly_id_whole;
+		u32 oly_id;
+		u32 node_id;
+
+		if (!gator_events_ccn504_enabled[i]) {
+			continue;
+		}
+
+		pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]);
+		node_type = get_node_type(gator_events_ccn504_event[i]);
+		region = get_region(gator_events_ccn504_event[i]);
+		//printk(KERN_ERR "%s(%s:%i) pmu_event_id: %x node_type: %x region: %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_id, node_type, region);
+
+		// Verify the node_type
+		oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID);
+		oly_id = oly_id_whole & 0x1F;
+		node_id = (oly_id_whole >> 8) & 0x7F;
+		if ((oly_id != node_type) ||
+				((node_type == 0x16) && ((oly_id == 0x14) || (oly_id == 0x15) || (oly_id == 0x16) || (oly_id == 0x18) || (oly_id == 0x19) || (oly_id == 0x1A)))) {
+			printk(KERN_ERR "%s(%s:%i) oly_id is %x expected %x\n", __FUNCTION__, __FILE__, __LINE__, oly_id, node_type);
+			return -1;
+		}
+
+		// Set the control register
+		pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+		switch (node_type) {
+		case 0x08: // XP
+			pmu_event_sel |= pmu_event_id << (7*i);
+			gator_events_ccn504_set_dt_config(node_id, i, 0x4);
+			break;
+		case 0x04: // HN-F
+		case 0x16: // RN-I
+		case 0x10: // SBAS
+			pmu_event_sel |= pmu_event_id << (4*i);
+			gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC);
+			break;
+		}
+		//printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_sel, region*REGION_SIZE + PMU_EVENT_SEL);
+		writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+	}
+
+	return 0;
+}
+
+static void gator_events_ccn504_stop(void)
+{
+	int i;
+
+	// cycle counter does not need to be disabled
+	for (i = 0; i < CCNT; ++i) {
+		int node_type;
+		int region;
+
+		node_type = get_node_type(gator_events_ccn504_event[i]);
+		region = get_region(gator_events_ccn504_event[i]);
+
+		//printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, region*REGION_SIZE + PMU_EVENT_SEL);
+		writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+	}
+
+	// Clear dt_config
+	for (i = 0; i < 11; ++i) {
+		//printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+		writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+	}
+}
+
+static int gator_events_ccn504_read(int **buffer)
+{
+	int i;
+	int len = 0;
+
+	if (!on_primary_core()) {
+		return 0;
+	}
+
+	// Verify the pmsr register is zero
+	//i = 0;
+	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0) {
+		//++i;
+	}
+	//printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i);
+
+	// Request a PMU snapshot
+	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ);
+
+	// Wait for the snapshot
+	//i = 0;
+	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0) {
+		//++i;
+	}
+	//printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i);
+
+	// Read the shadow registers
+	for (i = 0; i < CNTMAX; ++i) {
+		if (!gator_events_ccn504_enabled[i]) {
+			continue;
+		}
+
+		gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i];
+		gator_events_ccn504_buffer[len++] = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i));
+
+		// Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does?
+	}
+
+	// Clear the PMU snapshot status
+	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR);
+
+	return len;
+}
+
+static void __maybe_unused gator_events_ccn504_enumerate(int pos, int size)
+{
+	int i;
+	u32 oly_id;
+
+	for (i = pos; i < pos + size; ++i) {
+		oly_id = readl(gator_events_ccn504_base + i*REGION_SIZE + OLY_ID);
+		printk(KERN_ERR "%s(%s:%i) %i %08x\n", __FUNCTION__, __FILE__, __LINE__, i, oly_id);
+	}
+}
+
+static struct gator_interface gator_events_ccn504_interface = {
+	.shutdown = gator_events_ccn504_create_shutdown,
+	.create_files = gator_events_ccn504_create_files,
+	.start = gator_events_ccn504_start,
+	.stop = gator_events_ccn504_stop,
+	.read = gator_events_ccn504_read,
+};
+
+int gator_events_ccn504_init(void)
+{
+	int i;
+
+	if (ccn504_addr == 0) {
+		return -1;
+	}
+
+	gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE);
+	if (gator_events_ccn504_base == NULL) {
+		printk(KERN_ERR "%s(%s:%i) ioremap returned NULL\n", __FUNCTION__, __FILE__, __LINE__);
+		return -1;
+	}
+	//printk(KERN_ERR "%s(%s:%i)\n", __FUNCTION__, __FILE__, __LINE__);
+
+	// Test - can memory be read
+	{
+		//gator_events_ccn504_enumerate(0, NUM_REGIONS);
+
+#if 0
+		// DT
+		gator_events_ccn504_enumerate(1, 1);
+		// HN-F
+		gator_events_ccn504_enumerate(32, 8);
+		// XP
+		gator_events_ccn504_enumerate(64, 11);
+		// RN-I
+		gator_events_ccn504_enumerate(128, 1);
+		gator_events_ccn504_enumerate(130, 1);
+		gator_events_ccn504_enumerate(134, 1);
+		gator_events_ccn504_enumerate(140, 1);
+		gator_events_ccn504_enumerate(144, 1);
+		gator_events_ccn504_enumerate(148, 1);
+		// SBAS
+		gator_events_ccn504_enumerate(129, 1);
+		gator_events_ccn504_enumerate(137, 1);
+		gator_events_ccn504_enumerate(139, 1);
+		gator_events_ccn504_enumerate(147, 1);
+#endif
+	}
+
+	for (i = 0; i < CNTMAX; ++i) {
+		gator_events_ccn504_enabled[i] = 0;
+		gator_events_ccn504_event[i] = 0;
+		gator_events_ccn504_key[i] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_ccn504_interface);
+}
+
+gator_events_init(gator_events_ccn504_init);
diff --git a/drivers/gator/gator_events_mali_400.c b/drivers/gator/gator_events_mali_400.c
deleted file mode 100644
index 38c97d1..0000000
--- a/drivers/gator/gator_events_mali_400.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2010-2013. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "gator.h"
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/math64.h>
-
-#include "linux/mali_linux_trace.h"
-
-#include "gator_events_mali_common.h"
-#include "gator_events_mali_400.h"
-
-/*
- * There are (currently) three different variants of the comms between gator and Mali:
- * 1 (deprecated): No software counter support
- * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
- * 3 (default): Single tracepoint for all s/w counters in a bundle.
- * Interface style 3 is the default if no other is specified.  1 and 2 will be eliminated when
- * existing Mali DDKs are upgraded.
- */
-
-#if !defined(GATOR_MALI_INTERFACE_STYLE)
-#define GATOR_MALI_INTERFACE_STYLE (3)
-#endif
-
-/*
- * List of possible actions allowing DDK to be controlled by Streamline.
- * The following numbers are used by DDK to control the frame buffer dumping.
- */
-#define FBDUMP_CONTROL_ENABLE (1)
-#define FBDUMP_CONTROL_RATE (2)
-#define SW_EVENTS_ENABLE      (3)
-#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
-
-/*
- * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
- */
-#if (MALI_SUPPORT != MALI_400)
-#error MALI_SUPPORT set to an invalid device code: expecting MALI_400
-#endif
-
-/*
- * The number of fragment processors.  Update to suit your hardware implementation.
- */
-#define NUM_FP_UNITS            (4)
-
-enum counters {
-	/* Timeline activity */
-	ACTIVITY_VP = 0,
-	ACTIVITY_FP0,
-	ACTIVITY_FP1,
-	ACTIVITY_FP2,
-	ACTIVITY_FP3,
-
-	/* L2 cache counters */
-	COUNTER_L2_C0,
-	COUNTER_L2_C1,
-
-	/* Vertex processor counters */
-	COUNTER_VP_C0,
-	COUNTER_VP_C1,
-
-	/* Fragment processor counters */
-	COUNTER_FP0_C0,
-	COUNTER_FP0_C1,
-	COUNTER_FP1_C0,
-	COUNTER_FP1_C1,
-	COUNTER_FP2_C0,
-	COUNTER_FP2_C1,
-	COUNTER_FP3_C0,
-	COUNTER_FP3_C1,
-
-	/* EGL Software Counters */
-	COUNTER_EGL_BLIT_TIME,
-
-	/* GLES Software Counters */
-	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
-	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
-	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
-	COUNTER_GLES_DRAW_ARRAYS_CALLS,
-	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
-	COUNTER_GLES_DRAW_POINTS,
-	COUNTER_GLES_DRAW_LINES,
-	COUNTER_GLES_DRAW_LINE_LOOP,
-	COUNTER_GLES_DRAW_LINE_STRIP,
-	COUNTER_GLES_DRAW_TRIANGLES,
-	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
-	COUNTER_GLES_DRAW_TRIANGLE_FAN,
-	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
-	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
-	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
-	COUNTER_GLES_UPLOAD_VBO_TIME,
-	COUNTER_GLES_NUM_FLUSHES,
-	COUNTER_GLES_NUM_VSHADERS_GENERATED,
-	COUNTER_GLES_NUM_FSHADERS_GENERATED,
-	COUNTER_GLES_VSHADER_GEN_TIME,
-	COUNTER_GLES_FSHADER_GEN_TIME,
-	COUNTER_GLES_INPUT_TRIANGLES,
-	COUNTER_GLES_VXCACHE_HIT,
-	COUNTER_GLES_VXCACHE_MISS,
-	COUNTER_GLES_VXCACHE_COLLISION,
-	COUNTER_GLES_CULLED_TRIANGLES,
-	COUNTER_GLES_CULLED_LINES,
-	COUNTER_GLES_BACKFACE_TRIANGLES,
-	COUNTER_GLES_GBCLIP_TRIANGLES,
-	COUNTER_GLES_GBCLIP_LINES,
-	COUNTER_GLES_TRIANGLES_DRAWN,
-	COUNTER_GLES_DRAWCALL_TIME,
-	COUNTER_GLES_TRIANGLES_COUNT,
-	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
-	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
-	COUNTER_GLES_FAN_TRIANGLES_COUNT,
-	COUNTER_GLES_LINES_COUNT,
-	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
-	COUNTER_GLES_STRIP_LINES_COUNT,
-	COUNTER_GLES_LOOP_LINES_COUNT,
-
-	COUNTER_FILMSTRIP,
-	COUNTER_FREQUENCY,
-	COUNTER_VOLTAGE,
-
-	NUMBER_OF_EVENTS
-};
-
-#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP
-#define LAST_ACTIVITY_EVENT     ACTIVITY_FP3
-
-#define FIRST_HW_COUNTER        COUNTER_L2_C0
-#define LAST_HW_COUNTER         COUNTER_FP3_C1
-
-#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
-#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
-
-#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
-#define LAST_SPECIAL_COUNTER    COUNTER_VOLTAGE
-
-/* gatorfs variables for counter enable state,
- * the event the counter should count and the
- * 'key' (a unique id set by gatord and returned
- * by gator.ko)
- */
-static unsigned long counter_enabled[NUMBER_OF_EVENTS];
-static unsigned long counter_event[NUMBER_OF_EVENTS];
-static unsigned long counter_key[NUMBER_OF_EVENTS];
-
-/* The data we have recorded */
-static u32 counter_data[NUMBER_OF_EVENTS];
-/* The address to sample (or 0 if samples are sent to us) */
-static u32 *counter_address[NUMBER_OF_EVENTS];
-
-/* An array used to return the data we recorded
- * as key,value pairs hence the *2
- */
-static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
-static unsigned long counter_prev[NUMBER_OF_EVENTS];
-
-/* Note whether tracepoints have been registered */
-static int trace_registered;
-
-/**
- * Calculate the difference and handle the overflow.
- */
-static u32 get_difference(u32 start, u32 end)
-{
-	if (start - end >= 0) {
-		return start - end;
-	}
-
-	// Mali counters are unsigned 32 bit values that wrap.
-	return (4294967295u - end) + start;
-}
-
-/**
- * Returns non-zero if the given counter ID is an activity counter.
- */
-static inline int is_activity_counter(unsigned int event_id)
-{
-	return (event_id >= FIRST_ACTIVITY_EVENT &&
-		event_id <= LAST_ACTIVITY_EVENT);
-}
-
-/**
- * Returns non-zero if the given counter ID is a hardware counter.
- */
-static inline int is_hw_counter(unsigned int event_id)
-{
-	return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
-}
-
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/**
- * Returns non-zero if the given counter ID is a software counter.
- */
-static inline int is_sw_counter(unsigned int event_id)
-{
-	return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
-}
-#endif
-
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/*
- * The Mali DDK uses s64 types to contain software counter values, but gator
- * can only use a maximum of 32 bits. This function scales a software counter
- * to an appropriate range.
- */
-static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
-{
-	u32 scaled_value;
-
-	switch (event_id) {
-	case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
-	case COUNTER_GLES_UPLOAD_VBO_TIME:
-		scaled_value = (u32)div_s64(value, 1000000);
-		break;
-	default:
-		scaled_value = (u32)value;
-		break;
-	}
-
-	return scaled_value;
-}
-#endif
-
-/* Probe for continuously sampled counter */
-#if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
-GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
-{
-	/* Turning on too many pr_debug statements in frequently called functions
-	 * can cause stability and/or performance problems
-	 */
-	//pr_debug("gator: mali_sample_address %d %d\n", event_id, addr);
-	if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) {
-		counter_address[event_id] = addr;
-	}
-}
-#endif
-
-/* Probe for hardware counter events */
-GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
-{
-	/* Turning on too many pr_debug statements in frequently called functions
-	 * can cause stability and/or performance problems
-	 */
-	//pr_debug("gator: mali_hw_counter %d %d\n", event_id, value);
-	if (is_hw_counter(event_id)) {
-		counter_data[event_id] = value;
-	}
-}
-
-#if GATOR_MALI_INTERFACE_STYLE == 2
-GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
-{
-	if (is_sw_counter(event_id)) {
-		counter_data[event_id] = scale_sw_counter_value(event_id, value);
-	}
-}
-#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
-
-#if GATOR_MALI_INTERFACE_STYLE == 3
-GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
-{
-	u32 i;
-
-	/* Copy over the values for those counters which are enabled. */
-	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
-		if (counter_enabled[i]) {
-			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
-		}
-	}
-}
-#endif /* GATOR_MALI_INTERFACE_STYLE == 3 */
-
-static int create_files(struct super_block *sb, struct dentry *root)
-{
-	struct dentry *dir;
-	int event;
-	int n_fp = NUM_FP_UNITS;
-
-	const char *mali_name = gator_mali_get_mali_name();
-
-	/*
-	 * Create the filesystem entries for vertex processor, fragment processor
-	 * and L2 cache timeline and hardware counters. Software counters get 
-	 * special handling after this block.
-	 */
-	for (event = FIRST_ACTIVITY_EVENT; event <= LAST_HW_COUNTER; event++) {
-		char buf[40];
-
-		/* 
-		 * We can skip this event if it's for a non-existent fragment
-		 * processor.
-		 */
-		if (((event - ACTIVITY_FP0 >= n_fp) && (event < COUNTER_L2_C0))
-		    || (((event - COUNTER_FP0_C0) / 2 >= n_fp))) {
-			continue;
-		}
-
-		/* Otherwise, set up the filesystem entry for this event. */
-		switch (event) {
-		case ACTIVITY_VP:
-			snprintf(buf, sizeof buf, "ARM_%s_VP_active", mali_name);
-			break;
-		case ACTIVITY_FP0:
-		case ACTIVITY_FP1:
-		case ACTIVITY_FP2:
-		case ACTIVITY_FP3:
-			snprintf(buf, sizeof buf, "ARM_%s_FP%d_active",
-				 mali_name, event - ACTIVITY_FP0);
-			break;
-		case COUNTER_L2_C0:
-		case COUNTER_L2_C1:
-			snprintf(buf, sizeof buf, "ARM_%s_L2_cnt%d",
-				 mali_name, event - COUNTER_L2_C0);
-			break;
-		case COUNTER_VP_C0:
-		case COUNTER_VP_C1:
-			snprintf(buf, sizeof buf, "ARM_%s_VP_cnt%d",
-				 mali_name, event - COUNTER_VP_C0);
-			break;
-		case COUNTER_FP0_C0:
-		case COUNTER_FP0_C1:
-		case COUNTER_FP1_C0:
-		case COUNTER_FP1_C1:
-		case COUNTER_FP2_C0:
-		case COUNTER_FP2_C1:
-		case COUNTER_FP3_C0:
-		case COUNTER_FP3_C1:
-			snprintf(buf, sizeof buf, "ARM_%s_FP%d_cnt%d",
-				 mali_name, (event - COUNTER_FP0_C0) / 2,
-				 (event - COUNTER_FP0_C0) % 2);
-			break;
-		default:
-			printk("gator: trying to create file for non-existent counter (%d)\n", event);
-			continue;
-		}
-
-		dir = gatorfs_mkdir(sb, root, buf);
-
-		if (!dir) {
-			return -1;
-		}
-
-		gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
-
-		/* Only create an event node for counters that can change what they count */
-		if (event >= COUNTER_L2_C0) {
-			gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
-		}
-
-		gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
-	}
-
-	/* Now set up the software counter entries */
-	for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
-		char buf[40];
-
-		snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event);
-
-		dir = gatorfs_mkdir(sb, root, buf);
-
-		if (!dir) {
-			return -1;
-		}
-
-		gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
-		gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
-	}
-
-	/* Now set up the special counter entries */
-	for (event = FIRST_SPECIAL_COUNTER; event <= LAST_SPECIAL_COUNTER; event++) {
-		char buf[40];
-
-		switch (event) {
-		case COUNTER_FILMSTRIP:
-			snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
-			break;
-
-		case COUNTER_FREQUENCY:
-			snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
-			break;
-
-		case COUNTER_VOLTAGE:
-			snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
-			break;
-
-		default:
-			break;
-		}
-
-		dir = gatorfs_mkdir(sb, root, buf);
-
-		if (!dir) {
-			return -1;
-		}
-
-		gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
-		gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
-		gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
-	}
-
-	return 0;
-}
-
-/*
- * Local store for the get_counters entry point into the DDK.
- * This is stored here since it is used very regularly.
- */
-static mali_profiling_get_counters_type *mali_get_counters = NULL;
-
-/*
- * Examine list of software counters and determine if any one is enabled.
- * Returns 1 if any counter is enabled, 0 if none is.
- */
-static int is_any_sw_counter_enabled(void)
-{
-	unsigned int i;
-
-	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
-		if (counter_enabled[i]) {
-			return 1;	/* At least one counter is enabled */
-		}
-	}
-
-	return 0;		/* No s/w counters enabled */
-}
-
-static void mali_counter_initialize(void)
-{
-	/* If a Mali driver is present and exporting the appropriate symbol
-	 * then we can request the HW counters (of which there are only 2)
-	 * be configured to count the desired events
-	 */
-	mali_profiling_set_event_type *mali_set_hw_event;
-	mali_osk_fb_control_set_type *mali_set_fb_event;
-	mali_profiling_control_type *mali_control;
-
-	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
-
-	if (mali_set_hw_event) {
-		int i;
-
-		pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
-
-		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
-			if (counter_enabled[i]) {
-				mali_set_hw_event(i, counter_event[i]);
-			} else {
-				mali_set_hw_event(i, 0xFFFFFFFF);
-			}
-		}
-
-		symbol_put(_mali_profiling_set_event);
-	} else {
-		printk("gator: mali online _mali_profiling_set_event symbol not found\n");
-	}
-
-	mali_set_fb_event = symbol_get(_mali_osk_fb_control_set);
-
-	if (mali_set_fb_event) {
-		pr_debug("gator: mali online _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event);
-
-		mali_set_fb_event(0, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
-
-		symbol_put(_mali_osk_fb_control_set);
-	} else {
-		printk("gator: mali online _mali_osk_fb_control_set symbol not found\n");
-	}
-
-	/* Generic control interface for Mali DDK. */
-	mali_control = symbol_get(_mali_profiling_control);
-	if (mali_control) {
-		/* The event attribute in the XML file keeps the actual frame rate. */
-		unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff;
-		unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff;
-
-		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
-
-		mali_control(SW_EVENTS_ENABLE, (is_any_sw_counter_enabled() ? 1 : 0));
-		mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
-		mali_control(FBDUMP_CONTROL_RATE, rate);
-		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
-
-		pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate);
-
-		symbol_put(_mali_profiling_control);
-	} else {
-		printk("gator: mali online _mali_profiling_control symbol not found\n");
-	}
-
-	mali_get_counters = symbol_get(_mali_profiling_get_counters);
-	if (mali_get_counters) {
-		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
-		counter_prev[COUNTER_L2_C0] = 0;
-		counter_prev[COUNTER_L2_C1] = 0;
-	} else {
-		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
-	}
-}
-
-static void mali_counter_deinitialize(void)
-{
-	mali_profiling_set_event_type *mali_set_hw_event;
-	mali_osk_fb_control_set_type *mali_set_fb_event;
-	mali_profiling_control_type *mali_control;
-
-	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
-
-	if (mali_set_hw_event) {
-		int i;
-
-		pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
-		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
-			mali_set_hw_event(i, 0xFFFFFFFF);
-		}
-
-		symbol_put(_mali_profiling_set_event);
-	} else {
-		printk("gator: mali offline _mali_profiling_set_event symbol not found\n");
-	}
-
-	mali_set_fb_event = symbol_get(_mali_osk_fb_control_set);
-
-	if (mali_set_fb_event) {
-		pr_debug("gator: mali offline _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event);
-
-		mali_set_fb_event(0, 0);
-
-		symbol_put(_mali_osk_fb_control_set);
-	} else {
-		printk("gator: mali offline _mali_osk_fb_control_set symbol not found\n");
-	}
-
-	/* Generic control interface for Mali DDK. */
-	mali_control = symbol_get(_mali_profiling_control);
-
-	if (mali_control) {
-		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_set_fb_event);
-
-		/* Reset the DDK state - disable counter collection */
-		mali_control(SW_EVENTS_ENABLE, 0);
-
-		mali_control(FBDUMP_CONTROL_ENABLE, 0);
-
-		symbol_put(_mali_profiling_control);
-	} else {
-		printk("gator: mali offline _mali_profiling_control symbol not found\n");
-	}
-
-	if (mali_get_counters) {
-		symbol_put(_mali_profiling_get_counters);
-	}
-
-}
-
-static int start(void)
-{
-	// register tracepoints
-	if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
-		printk("gator: mali_hw_counter tracepoint failed to activate\n");
-		return -1;
-	}
-
-#if GATOR_MALI_INTERFACE_STYLE == 1
-	/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-	/* For patched Mali driver. */
-	if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
-		printk("gator: mali_sw_counter tracepoint failed to activate\n");
-		return -1;
-	}
-#elif GATOR_MALI_INTERFACE_STYLE == 3
-/* For Mali drivers with built-in support. */
-	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
-		printk("gator: mali_sw_counters tracepoint failed to activate\n");
-		return -1;
-	}
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
-
-	trace_registered = 1;
-
-	mali_counter_initialize();
-	return 0;
-}
-
-static void stop(void)
-{
-	unsigned int cnt;
-
-	pr_debug("gator: mali stop\n");
-
-	if (trace_registered) {
-		GATOR_UNREGISTER_TRACE(mali_hw_counter);
-
-#if GATOR_MALI_INTERFACE_STYLE == 1
-		/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-		/* For patched Mali driver. */
-		GATOR_UNREGISTER_TRACE(mali_sw_counter);
-#elif GATOR_MALI_INTERFACE_STYLE == 3
-		/* For Mali drivers with built-in support. */
-		GATOR_UNREGISTER_TRACE(mali_sw_counters);
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
-
-		pr_debug("gator: mali timeline tracepoint deactivated\n");
-
-		trace_registered = 0;
-	}
-
-	for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) {
-		counter_enabled[cnt] = 0;
-		counter_event[cnt] = 0;
-		counter_address[cnt] = NULL;
-	}
-
-	mali_counter_deinitialize();
-}
-
-static int read(int **buffer)
-{
-	int cnt, len = 0;
-
-	if (!on_primary_core())
-		return 0;
-
-	// Read the L2 C0 and C1 here.
-	if (counter_enabled[COUNTER_L2_C0] || counter_enabled[COUNTER_L2_C1]) {
-		u32 src0 = 0;
-		u32 val0 = 0;
-		u32 src1 = 0;
-		u32 val1 = 0;
-
-		// Poke the driver to get the counter values
-		if (mali_get_counters) {
-			mali_get_counters(&src0, &val0, &src1, &val1);
-		}
-
-		if (counter_enabled[COUNTER_L2_C0]) {
-			// Calculate and save src0's counter val0
-			counter_dump[len++] = counter_key[COUNTER_L2_C0];
-			counter_dump[len++] = get_difference(val0, counter_prev[COUNTER_L2_C0]);
-		}
-
-		if (counter_enabled[COUNTER_L2_C1]) {
-			// Calculate and save src1's counter val1
-			counter_dump[len++] = counter_key[COUNTER_L2_C1];
-			counter_dump[len++] = get_difference(val1, counter_prev[COUNTER_L2_C1]);
-		}
-
-		// Save the previous values for the counters.
-		counter_prev[COUNTER_L2_C0] = val0;
-		counter_prev[COUNTER_L2_C1] = val1;
-	}
-
-	// Process other (non-timeline) counters.
-	for (cnt = COUNTER_VP_C0; cnt <= LAST_SW_COUNTER; cnt++) {
-		if (counter_enabled[cnt]) {
-			counter_dump[len++] = counter_key[cnt];
-			counter_dump[len++] = counter_data[cnt];
-
-			counter_data[cnt] = 0;
-		}
-	}
-
-	/*
-	 * Add in the voltage and frequency counters if enabled.  Note that, since these are
-	 * actually passed as events, the counter value should not be cleared.
-	 */
-	cnt = COUNTER_FREQUENCY;
-	if (counter_enabled[cnt]) {
-		counter_dump[len++] = counter_key[cnt];
-		counter_dump[len++] = counter_data[cnt];
-	}
-
-	cnt = COUNTER_VOLTAGE;
-	if (counter_enabled[cnt]) {
-		counter_dump[len++] = counter_key[cnt];
-		counter_dump[len++] = counter_data[cnt];
-	}
-
-	if (buffer) {
-		*buffer = (int *)counter_dump;
-	}
-
-	return len;
-}
-
-static struct gator_interface gator_events_mali_interface = {
-	.create_files = create_files,
-	.start = start,
-	.stop = stop,
-	.read = read,
-};
-
-extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
-{
-	counter_data[COUNTER_FREQUENCY] = frequency_mhz;
-	counter_data[COUNTER_VOLTAGE] = voltage_mv;
-}
-
-int gator_events_mali_init(void)
-{
-	unsigned int cnt;
-
-	pr_debug("gator: mali init\n");
-
-	for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) {
-		counter_enabled[cnt] = 0;
-		counter_event[cnt] = 0;
-		counter_key[cnt] = gator_events_get_key();
-		counter_address[cnt] = NULL;
-		counter_data[cnt] = 0;
-	}
-
-	trace_registered = 0;
-
-	return gator_events_install(&gator_events_mali_interface);
-}
-
-gator_events_init(gator_events_mali_init);
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
new file mode 100644
index 0000000..dd275f7
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -0,0 +1,751 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+#include "gator_events_mali_4xx.h"
+
+/*
+ * There are (currently) four different variants of the comms between gator and Mali:
+ * 1 (deprecated): No software counter support
+ * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
+ * 3 (default): Single tracepoint for all s/w counters in a bundle.
+ * Interface style 3 is the default if no other is specified.  1 and 2 will be eliminated when
+ * existing Mali DDKs are upgraded.
+ * 4. As above, but for the Utgard (Mali-450) driver.
+ */
+
+#if !defined(GATOR_MALI_INTERFACE_STYLE)
+#define GATOR_MALI_INTERFACE_STYLE (3)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE < 4
+#include "mali/mali_mjollnir_profiling_gator_api.h"
+#else
+#include "mali/mali_utgard_profiling_gator_api.h"
+#endif
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_4xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
+#endif
+
+/* gatorfs variables for counter enable state,
+ * the event the counter should count and the
+ * 'key' (a unique id set by gatord and returned
+ * by gator.ko)
+ */
+static unsigned long counter_enabled[NUMBER_OF_EVENTS];
+static unsigned long counter_event[NUMBER_OF_EVENTS];
+static unsigned long counter_key[NUMBER_OF_EVENTS];
+
+/* The data we have recorded */
+static u32 counter_data[NUMBER_OF_EVENTS];
+/* The address to sample (or 0 if samples are sent to us) */
+static u32 *counter_address[NUMBER_OF_EVENTS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+static unsigned long counter_prev[NUMBER_OF_EVENTS];
+
+/* Note whether tracepoints have been registered */
+static int trace_registered;
+
+/*
+ * These numbers define the actual numbers of each block type that exist in the system. Initially
+ * these are set to the maxima defined above; if the driver is capable of being queried (newer
+ * drivers only) then the values may be revised.
+ */
+static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
+static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
+static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
+
+/**
+ * Calculate the difference and handle the overflow.
+ */
+static u32 get_difference(u32 start, u32 end)
+{
+	if (start - end >= 0) {
+		return start - end;
+	}
+
+	// Mali counters are unsigned 32 bit values that wrap.
+	return (4294967295u - end) + start;
+}
+
+/**
+ * Returns non-zero if the given counter ID is an activity counter.
+ */
+static inline int is_activity_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_ACTIVITY_EVENT &&
+		event_id <= LAST_ACTIVITY_EVENT);
+}
+
+/**
+ * Returns non-zero if the given counter ID is a hardware counter.
+ */
+static inline int is_hw_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
+}
+
+/*
+ * These are provided for utgard compatibility.
+ */
+typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
+typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/**
+ * Returns non-zero if the given counter ID is a software counter.
+ */
+static inline int is_sw_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
+}
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/*
+ * The Mali DDK uses s64 types to contain software counter values, but gator
+ * can only use a maximum of 32 bits. This function scales a software counter
+ * to an appropriate range.
+ */
+static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
+{
+	u32 scaled_value;
+
+	switch (event_id) {
+	case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
+	case COUNTER_GLES_UPLOAD_VBO_TIME:
+		scaled_value = (u32)div_s64(value, 1000000);
+		break;
+	default:
+		scaled_value = (u32)value;
+		break;
+	}
+
+	return scaled_value;
+}
+#endif
+
+/* Probe for continuously sampled counter */
+#if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
+GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
+{
+	/* Turning on too many pr_debug statements in frequently called functions
+	 * can cause stability and/or performance problems
+	 */
+	//pr_debug("gator: mali_sample_address %d %d\n", event_id, addr);
+	if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) {
+		counter_address[event_id] = addr;
+	}
+}
+#endif
+
+/* Probe for hardware counter events */
+GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
+{
+	/* Turning on too many pr_debug statements in frequently called functions
+	 * can cause stability and/or performance problems
+	 */
+	//pr_debug("gator: mali_hw_counter %d %d\n", event_id, value);
+	if (is_hw_counter(event_id)) {
+		counter_data[event_id] = value;
+	}
+}
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
+{
+	if (is_sw_counter(event_id)) {
+		counter_data[event_id] = scale_sw_counter_value(event_id, value);
+	}
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
+
+#if GATOR_MALI_INTERFACE_STYLE >= 3
+GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
+{
+	u32 i;
+
+	/* Copy over the values for those counters which are enabled. */
+	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
+		if (counter_enabled[i]) {
+			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
+		}
+	}
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
+
+/**
+ * Create a single filesystem entry for a specified event.
+ * @param sb the superblock
+ * @param root Filesystem root
+ * @param name The name of the entry to create
+ * @param event The ID of the event
+ * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create.
+ *
+ * @return 0 if ok, non-zero if the create failed.
+ */
+static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item)
+{
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, name);
+
+	if (!dir) {
+		return -1;
+	}
+
+	if (create_event_item) {
+		gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
+	}
+
+	gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+
+	return 0;
+}
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+/*
+ * Read the version info structure if available
+ */
+static void initialise_version_info(void)
+{
+	_mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol;
+
+	mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
+
+	if (mali_profiling_get_mali_version_symbol) {
+		struct _mali_profiling_mali_version version_info;
+
+		pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n",
+				mali_profiling_get_mali_version_symbol);
+
+		/*
+		 * Revise the number of each different core type using information derived from the DDK.
+		 */
+		mali_profiling_get_mali_version_symbol(&version_info);
+
+		n_fp_cores = version_info.num_of_fp_cores;
+		n_vp_cores = version_info.num_of_vp_cores;
+		n_l2_cores = version_info.num_of_l2_cores;
+
+		/* Release the function - we're done with it. */
+		symbol_put(_mali_profiling_get_mali_version);
+	} else {
+		printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+	}
+}
+#endif
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	int event;
+	const char *mali_name = gator_mali_get_mali_name();
+
+	char buf[40];
+	int core_id;
+	int counter_number;
+
+	pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE);
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+	/*
+	 * Initialise first: this sets up the number of cores available (on compatible DDK versions).
+	 * Ideally this would not need guarding but other parts of the code depend on the interface style being set
+	 * correctly; if it is not then the system can enter an inconsistent state.
+	 */
+	initialise_version_info();
+#endif
+
+	/* Vertex processor counters */
+	for (core_id = 0; core_id < n_vp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_VP_0;
+		snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+			return -1;
+		}
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < n_fp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_FP_0 + core_id;
+
+		snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+			return -1;
+		}
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < n_l2_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
+		snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
+
+		if (create_fs_entry(sb, root, buf, event, 0) != 0) {
+			return -1;
+		}
+	}
+
+	/* Now set up the special counter entries */
+	snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) {
+		return -1;
+	}
+
+#ifdef DVFS_REPORTED_BY_DDK
+	snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) {
+		return -1;
+	}
+
+	snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) {
+		return -1;
+	}
+#endif
+
+	return 0;
+}
+
+/*
+ * Local store for the get_counters entry point into the DDK.
+ * This is stored here since it is used very regularly.
+ */
+static mali_profiling_get_counters_type *mali_get_counters = NULL;
+static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL;
+
+/*
+ * Examine list of counters between two index limits and determine if any one is enabled.
+ * Returns 1 if any counter is enabled, 0 if none is.
+ */
+static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter)
+{
+	unsigned int i;
+
+	for (i = first_counter; i <= last_counter; i++) {
+		if (counter_enabled[i]) {
+			return 1;	/* At least one counter is enabled */
+		}
+	}
+
+	return 0;		/* No s/w counters enabled */
+}
+
+static void init_counters(unsigned int from_counter, unsigned int to_counter)
+{
+	unsigned int counter_id;
+
+	/* If a Mali driver is present and exporting the appropriate symbol
+	 * then we can request the HW counters (of which there are only 2)
+	 * be configured to count the desired events
+	 */
+	mali_profiling_set_event_type *mali_set_hw_event;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+
+		for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+			if (counter_enabled[counter_id]) {
+				mali_set_hw_event(counter_id, counter_event[counter_id]);
+			} else {
+				mali_set_hw_event(counter_id, 0xFFFFFFFF);
+			}
+		}
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		printk("gator: mali online _mali_profiling_set_event symbol not found\n");
+	}
+}
+
+static void mali_counter_initialize(void)
+{
+	int i;
+	int core_id;
+
+	mali_osk_fb_control_set_type *mali_set_fb_event;
+	mali_profiling_control_type *mali_control;
+
+	init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1);
+	init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1);
+	init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1);
+
+	mali_set_fb_event = symbol_get(_mali_osk_fb_control_set);
+
+	if (mali_set_fb_event) {
+		pr_debug("gator: mali online _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event);
+
+		mali_set_fb_event(0, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
+
+		symbol_put(_mali_osk_fb_control_set);
+	} else {
+		printk("gator: mali online _mali_osk_fb_control_set symbol not found\n");
+	}
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		/* The event attribute in the XML file keeps the actual frame rate. */
+		unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff;
+		unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff;
+
+		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+		mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_RATE, rate);
+		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+		pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali online _mali_profiling_control symbol not found\n");
+	}
+
+	mali_get_counters = symbol_get(_mali_profiling_get_counters);
+	if (mali_get_counters) {
+		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
+
+	} else {
+		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
+	}
+
+	mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
+	if (mali_get_l2_counters) {
+		pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
+
+	} else {
+		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined");
+	}
+
+	if (!mali_get_counters && !mali_get_l2_counters) {
+		pr_debug("gator: WARNING: no L2 counters available");
+		n_l2_cores = 0;
+	}
+
+	for (core_id = 0; core_id < n_l2_cores; core_id++) {
+		int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
+		counter_prev[counter_id] = 0;
+		counter_prev[counter_id + 1] = 0;
+	}
+
+	/* Clear counters in the start */
+	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
+		counter_data[i] = 0;
+	}
+}
+
+static void mali_counter_deinitialize(void)
+{
+	mali_profiling_set_event_type *mali_set_hw_event;
+	mali_osk_fb_control_set_type *mali_set_fb_event;
+	mali_profiling_control_type *mali_control;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		int i;
+
+		pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
+			mali_set_hw_event(i, 0xFFFFFFFF);
+		}
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		printk("gator: mali offline _mali_profiling_set_event symbol not found\n");
+	}
+
+	mali_set_fb_event = symbol_get(_mali_osk_fb_control_set);
+
+	if (mali_set_fb_event) {
+		pr_debug("gator: mali offline _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event);
+
+		mali_set_fb_event(0, 0);
+
+		symbol_put(_mali_osk_fb_control_set);
+	} else {
+		printk("gator: mali offline _mali_osk_fb_control_set symbol not found\n");
+	}
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+
+	if (mali_control) {
+		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_set_fb_event);
+
+		/* Reset the DDK state - disable counter collection */
+		mali_control(SW_COUNTER_ENABLE, 0);
+
+		mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali offline _mali_profiling_control symbol not found\n");
+	}
+
+	if (mali_get_counters) {
+		symbol_put(_mali_profiling_get_counters);
+	}
+
+	if (mali_get_l2_counters) {
+		symbol_put(_mali_profiling_get_l2_counters);
+	}
+}
+
+static int start(void)
+{
+	// register tracepoints
+	if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
+		printk("gator: mali_hw_counter tracepoint failed to activate\n");
+		return -1;
+	}
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+	/* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+	/* For patched Mali driver. */
+	if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
+		printk("gator: mali_sw_counter tracepoint failed to activate\n");
+		return -1;
+	}
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+	/* For Mali drivers with built-in support. */
+	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
+		printk("gator: mali_sw_counters tracepoint failed to activate\n");
+		return -1;
+	}
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+	trace_registered = 1;
+
+	mali_counter_initialize();
+	return 0;
+}
+
+static void stop(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali stop\n");
+
+	if (trace_registered) {
+		GATOR_UNREGISTER_TRACE(mali_hw_counter);
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+		/* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+		/* For patched Mali driver. */
+		GATOR_UNREGISTER_TRACE(mali_sw_counter);
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+		/* For Mali drivers with built-in support. */
+		GATOR_UNREGISTER_TRACE(mali_sw_counters);
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+		pr_debug("gator: mali timeline tracepoint deactivated\n");
+
+		trace_registered = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_address[cnt] = NULL;
+	}
+
+	mali_counter_deinitialize();
+}
+
+static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len)
+{
+	unsigned int counter_id;
+
+	for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+		if (counter_enabled[counter_id]) {
+			counter_dump[(*len)++] = counter_key[counter_id];
+			counter_dump[(*len)++] = counter_data[counter_id];
+
+			counter_data[counter_id] = 0;
+		}
+	}
+}
+
+static int read(int **buffer)
+{
+	int len = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	// Read the L2 C0 and C1 here.
+	if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
+		unsigned int unavailable_l2_caches = 0;
+		_mali_profiling_l2_counter_values cache_values;
+		unsigned int cache_id;
+		struct _mali_profiling_core_counters *per_core;
+
+		/* Poke the driver to get the counter values - older style; only one L2 cache */
+		if (mali_get_l2_counters) {
+			unavailable_l2_caches = mali_get_l2_counters(&cache_values);
+		} else if (mali_get_counters) {
+			per_core = &cache_values.cores[0];
+			mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1);
+		} else {
+			/* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */
+		}
+
+		/* Fill in the two cache counter values for each cache block. */
+		for (cache_id = 0; cache_id < n_l2_cores; cache_id++) {
+			unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
+			unsigned int counter_id_1 = counter_id_0 + 1;
+
+			if ((1 << cache_id) & unavailable_l2_caches) {
+				continue; /* This cache is unavailable (powered-off, possibly). */
+			}
+
+			per_core = &cache_values.cores[cache_id];
+
+			if (counter_enabled[counter_id_0]) {
+				// Calculate and save src0's counter val0
+				counter_dump[len++] = counter_key[counter_id_0];
+				counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+			}
+
+			if (counter_enabled[counter_id_1]) {
+				// Calculate and save src1's counter val1
+				counter_dump[len++] = counter_key[counter_id_1];
+				counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+			}
+
+			// Save the previous values for the counters.
+			counter_prev[counter_id_0] = per_core->value0;
+			counter_prev[counter_id_1] = per_core->value1;
+		}
+	}
+
+	/* Process other (non-timeline) counters. */
+	dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len);
+	dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len);
+
+	dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len);
+
+#ifdef DVFS_REPORTED_BY_DDK
+	{
+		int cnt;
+		/*
+		 * Add in the voltage and frequency counters if enabled.  Note that, since these are
+		 * actually passed as events, the counter value should not be cleared.
+		 */
+		cnt = COUNTER_FREQUENCY;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+
+		cnt = COUNTER_VOLTAGE;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+	}
+#endif
+
+	if (buffer) {
+		*buffer = (int *)counter_dump;
+	}
+
+	return len;
+}
+
+static struct gator_interface gator_events_mali_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read,
+};
+
+extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
+{
+#ifdef DVFS_REPORTED_BY_DDK
+	counter_data[COUNTER_FREQUENCY] = frequency_mhz;
+	counter_data[COUNTER_VOLTAGE] = voltage_mv;
+#endif
+}
+
+int gator_events_mali_init(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali init\n");
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_key[cnt] = gator_events_get_key();
+		counter_address[cnt] = NULL;
+		counter_data[cnt] = 0;
+	}
+
+	trace_registered = 0;
+
+	return gator_events_install(&gator_events_mali_interface);
+}
+
+gator_events_init(gator_events_mali_init);
diff --git a/drivers/gator/gator_events_mali_400.h b/drivers/gator/gator_events_mali_4xx.h
similarity index 67%
rename from drivers/gator/gator_events_mali_400.h
rename to drivers/gator/gator_events_mali_4xx.h
index 43aec49..413ad0f 100644
--- a/drivers/gator/gator_events_mali_400.h
+++ b/drivers/gator/gator_events_mali_4xx.h
@@ -8,11 +8,11 @@
  */
 
 /*
- * Header contains common definitions for the Mali-400 processors.
+ * Header contains common definitions for the Mali-4xx processors.
  */
-#if !defined(GATOR_EVENTS_MALI_400_H)
-#define GATOR_EVENTS_MALI_400_H
+#if !defined(GATOR_EVENTS_MALI_4xx_H)
+#define GATOR_EVENTS_MALI_4xx_H
 
 extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1);
 
-#endif /* GATOR_EVENTS_MALI_400_H */
+#endif /* GATOR_EVENTS_MALI_4xx_H */
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
index 22a517d..5a98b37 100644
--- a/drivers/gator/gator_events_mali_common.c
+++ b/drivers/gator/gator_events_mali_common.c
@@ -20,8 +20,8 @@
 	switch (id) {
 	case MALI_T6xx:
 		return "Mali-T6xx";
-	case MALI_400:
-		return "Mali-400";
+	case MALI_4xx:
+		return "Mali-4xx";
 	default:
 		pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
 		return "Mali-Unknown";
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
index 27eaacc..d67ee2d 100644
--- a/drivers/gator/gator_events_mali_common.h
+++ b/drivers/gator/gator_events_mali_common.h
@@ -19,7 +19,7 @@
 #include <asm/io.h>
 
 /* Device codes for each known GPU */
-#define MALI_400     (0x0b07)
+#define MALI_4xx     (0x0b07)
 #define MALI_T6xx    (0x0056)
 
 /* Ensure that MALI_SUPPORT has been defined to something. */
@@ -40,9 +40,9 @@
 } mali_counter;
 
 /*
- * Mali-400
+ * Mali-4xx
  */
-typedef void mali_profiling_set_event_type(unsigned int, unsigned int);
+typedef int mali_profiling_set_event_type(unsigned int, int);
 typedef void mali_osk_fb_control_set_type(unsigned int, unsigned int);
 typedef void mali_profiling_control_type(unsigned int, unsigned int);
 typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
@@ -50,13 +50,13 @@
 /*
  * Driver entry points for functions called directly by gator.
  */
-extern void _mali_profiling_set_event(unsigned int, unsigned int);
+extern int _mali_profiling_set_event(unsigned int, int);
 extern void _mali_osk_fb_control_set(unsigned int, unsigned int);
 extern void _mali_profiling_control(unsigned int, unsigned int);
 extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
 
 /**
- * Returns a name which identifies the GPU type (eg Mali-400, Mali-T6xx).
+ * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
  *
  * @return The name as a constant string.
  */
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c
index 4f49c1d..f557350 100644
--- a/drivers/gator/gator_events_mali_t6xx_hw.c
+++ b/drivers/gator/gator_events_mali_t6xx_hw.c
@@ -304,11 +304,11 @@
 	"",
 	"",
 	"",
-	"MMU_TABLE_WALK",
-	"MMU_REPLAY_MISS",
-	"MMU_REPLAY_FULL",
-	"MMU_NEW_MISS",
 	"MMU_HIT",
+	"MMU_NEW_MISS",
+	"MMU_REPLAY_FULL",
+	"MMU_REPLAY_MISS",
+	"MMU_TABLE_WALK",
 	"",
 	"",
 	"",
@@ -316,11 +316,11 @@
 	"",
 	"",
 	"",
-	"UTLB_STALL",
-	"UTLB_REPLAY_MISS",
-	"UTLB_REPLAY_FULL",
-	"UTLB_NEW_MISS",
 	"UTLB_HIT",
+	"UTLB_NEW_MISS",
+	"UTLB_REPLAY_FULL",
+	"UTLB_REPLAY_MISS",
+	"UTLB_STALL",
 	"",
 	"",
 	"",
diff --git a/drivers/gator/gator_events_mmaped.c b/drivers/gator/gator_events_mmaped.c
index 0027564..f7670f6 100644
--- a/drivers/gator/gator_events_mmaped.c
+++ b/drivers/gator/gator_events_mmaped.c
@@ -13,9 +13,9 @@
  *
  * <counter_set name="mmaped_cnt" count="3"/>
  * <category name="mmaped" counter_set="mmaped_cnt" per_cpu="no">
- *   <event event="0x0" title="Simulated" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
- *   <event event="0x1" title="Simulated" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
- *   <event event="0x2" title="Simulated" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ *   <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
+ *   <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
+ *   <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
  * </category>
  */
 
diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c
index 8c35d49..b0c947a 100644
--- a/drivers/gator/gator_hrtimer_gator.c
+++ b/drivers/gator/gator_hrtimer_gator.c
@@ -13,6 +13,7 @@
 
 void (*callback)(void);
 DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
+DEFINE_PER_CPU(ktime_t, hrtimer_expire);
 DEFINE_PER_CPU(int, hrtimer_is_active);
 static ktime_t profiling_interval;
 static void gator_hrtimer_online(void);
@@ -20,7 +21,9 @@
 
 static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
 {
-	hrtimer_forward_now(hrtimer, profiling_interval);
+	int cpu = get_logical_cpu();
+	hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval);
+	per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval);
 	(*callback)();
 	return HRTIMER_RESTART;
 }
@@ -34,12 +37,13 @@
 		return;
 
 	per_cpu(hrtimer_is_active, cpu) = 1;
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer->function = gator_hrtimer_notify;
 #ifdef CONFIG_PREEMPT_RT_BASE
 	hrtimer->irqsafe = 1;
 #endif
-	hrtimer_start(hrtimer, profiling_interval, HRTIMER_MODE_REL_PINNED);
+	per_cpu(hrtimer_expire, cpu) = ktime_add(hrtimer->base->get_time(), profiling_interval);
+	hrtimer_start(hrtimer, per_cpu(hrtimer_expire, cpu), HRTIMER_MODE_ABS_PINNED);
 }
 
 static void gator_hrtimer_offline(void)
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
index 932be26..24233d7 100644
--- a/drivers/gator/gator_iks.c
+++ b/drivers/gator/gator_iks.c
@@ -14,18 +14,36 @@
 #include <asm/smp_plat.h>
 #include <trace/events/power_cpu_migrate.h>
 
+static bool map_cpuids;
 static int mpidr_cpuids[NR_CPUS];
+static const struct gator_cpu * mpidr_cpus[NR_CPUS];
 static int __lcpu_to_pcpu[NR_CPUS];
 
+static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+		if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0) {
+			return gator_cpu;
+		}
+	}
+
+	return NULL;
+}
+
 static void calc_first_cluster_size(void)
 {
 	int len;
 	const u32 *val;
+	const char *compatible;
 	struct device_node *cn = NULL;
 	int mpidr_cpuids_count = 0;
 
 	// Zero is a valid cpuid, so initialize the array to 0xff's
 	memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids));
+	memset(&mpidr_cpus, 0, sizeof(mpidr_cpus));
 
 	while ((cn = of_find_node_by_type(cn, "cpu"))) {
 		BUG_ON(mpidr_cpuids_count >= NR_CPUS);
@@ -35,12 +53,18 @@
 			pr_err("%s missing reg property\n", cn->full_name);
 			continue;
 		}
+		compatible = of_get_property(cn, "compatible", NULL);
+		if (compatible == NULL) {
+			pr_err("%s missing compatible property\n", cn->full_name);
+			continue;
+		}
 
 		mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val);
+		mpidr_cpus[mpidr_cpuids_count] = gator_find_cpu_by_dt_name(compatible);
 		++mpidr_cpuids_count;
 	}
 
-	BUG_ON(mpidr_cpuids_count != nr_cpu_ids);
+	map_cpuids = (mpidr_cpuids_count == nr_cpu_ids);
 }
 
 static int linearize_mpidr(int mpidr)
@@ -58,6 +82,10 @@
 int lcpu_to_pcpu(const int lcpu)
 {
 	int pcpu;
+
+	if (!map_cpuids)
+		return lcpu;
+
 	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
 	pcpu = __lcpu_to_pcpu[lcpu];
 	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
@@ -67,6 +95,10 @@
 int pcpu_to_lcpu(const int pcpu)
 {
 	int lcpu;
+
+	if (!map_cpuids)
+		return pcpu;
+
 	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
 	for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) {
 		if (__lcpu_to_pcpu[lcpu] == pcpu) {
@@ -111,9 +143,24 @@
 	gator_update_cpu_mapping(cpu_hwid);
 }
 
+static void gator_send_iks_core_names(void)
+{
+	int cpu;
+	// Send the cpu names
+	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+		if (mpidr_cpus[cpu] != NULL) {
+			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]);
+		}
+	}
+}
+
 static int gator_migrate_start(void)
 {
 	int retval = 0;
+
+	if (!map_cpuids)
+		return retval;
+
 	if (retval == 0)
 		retval = GATOR_REGISTER_TRACE(cpu_migrate_begin);
 	if (retval == 0)
@@ -130,6 +177,9 @@
 
 static void gator_migrate_stop(void)
 {
+	if (!map_cpuids)
+		return;
+
 	GATOR_UNREGISTER_TRACE(cpu_migrate_current);
 	GATOR_UNREGISTER_TRACE(cpu_migrate_finish);
 	GATOR_UNREGISTER_TRACE(cpu_migrate_begin);
@@ -138,6 +188,7 @@
 #else
 
 #define calc_first_cluster_size()
+#define gator_send_iks_core_names()
 #define gator_migrate_start() 0
 #define gator_migrate_stop()
 
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
index 46fe31d..7dd70d9 100644
--- a/drivers/gator/gator_main.c
+++ b/drivers/gator/gator_main.c
@@ -8,7 +8,7 @@
  */
 
 // This version must match the gator daemon version
-static unsigned long gator_protocol_version = 14;
+static unsigned long gator_protocol_version = 16;
 
 #include <linux/slab.h>
 #include <linux/cpu.h>
@@ -64,13 +64,13 @@
 #define NAME_BUFFER_SIZE          (64*1024)
 #define COUNTER_BUFFER_SIZE       (64*1024)	// counters have the core as part of the data and the core value in the frame header may be discarded
 #define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
-#define ANNOTATE_BUFFER_SIZE      (64*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
+#define ANNOTATE_BUFFER_SIZE      (128*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
 #define SCHED_TRACE_BUFFER_SIZE   (128*1024)
 #define GPU_TRACE_BUFFER_SIZE     (64*1024)	// gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
 #define IDLE_BUFFER_SIZE          (32*1024)	// idle counters have the core as part of the data and the core value in the frame header may be discarded
 
 #define NO_COOKIE      0U
-#define INVALID_COOKIE ~0U
+#define UNRESOLVED_COOKIE ~0U
 
 #define FRAME_SUMMARY       1
 #define FRAME_BACKTRACE     2
@@ -93,6 +93,7 @@
 
 #define MESSAGE_SCHED_SWITCH 1
 #define MESSAGE_SCHED_EXIT   2
+#define MESSAGE_SCHED_START  3
 
 #define MESSAGE_IDLE_ENTER 1
 #define MESSAGE_IDLE_EXIT 2
@@ -167,7 +168,7 @@
 static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len);
 static void gator_buffer_write_string(int cpu, int buftype, const char *x);
 static void gator_add_trace(int cpu, unsigned long address);
-static void gator_add_sample(int cpu, struct pt_regs *const regs);
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time);
 static u64 gator_get_time(void);
 
 // Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup.
@@ -185,11 +186,8 @@
 static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
 // The buffer. Allocated in gator_op_setup
 static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
-
-#if GATOR_LIVE
 // The time after which the buffer should be committed for live display
 static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
-#endif
 
 /******************************************************************************
  * Application Includes
@@ -198,11 +196,11 @@
 #include "gator_hrtimer_perf.c"
 #include "gator_hrtimer_gator.c"
 #include "gator_cookies.c"
+#include "gator_annotate.c"
 #include "gator_trace_sched.c"
 #include "gator_trace_power.c"
 #include "gator_trace_gpu.c"
 #include "gator_backtrace.c"
-#include "gator_annotate.c"
 #include "gator_fs.c"
 #include "gator_pack.c"
 
@@ -215,24 +213,28 @@
 		.cpuid = ARM1136,
 		.core_name = "ARM1136",
 		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1136",
 		.pmnc_counters = 3,
 	},
 	{
 		.cpuid = ARM1156,
 		.core_name = "ARM1156",
 		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1156",
 		.pmnc_counters = 3,
 	},
 	{
 		.cpuid = ARM1176,
 		.core_name = "ARM1176",
 		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1176",
 		.pmnc_counters = 3,
 	},
 	{
 		.cpuid = ARM11MPCORE,
 		.core_name = "ARM11MPCore",
 		.pmnc_name = "ARM_ARM11MPCore",
+		.dt_name = "arm,arm11mpcore",
 		.pmnc_counters = 3,
 	},
 	{
@@ -240,6 +242,7 @@
 		.core_name = "Cortex-A5",
 		.pmu_name = "ARMv7_Cortex_A5",
 		.pmnc_name = "ARM_Cortex-A5",
+		.dt_name = "arm,cortex-a5",
 		.pmnc_counters = 2,
 	},
 	{
@@ -247,6 +250,7 @@
 		.core_name = "Cortex-A7",
 		.pmu_name = "ARMv7_Cortex_A7",
 		.pmnc_name = "ARM_Cortex-A7",
+		.dt_name = "arm,cortex-a7",
 		.pmnc_counters = 4,
 	},
 	{
@@ -254,6 +258,7 @@
 		.core_name = "Cortex-A8",
 		.pmu_name = "ARMv7_Cortex_A8",
 		.pmnc_name = "ARM_Cortex-A8",
+		.dt_name = "arm,cortex-a8",
 		.pmnc_counters = 4,
 	},
 	{
@@ -261,6 +266,15 @@
 		.core_name = "Cortex-A9",
 		.pmu_name = "ARMv7_Cortex_A9",
 		.pmnc_name = "ARM_Cortex-A9",
+		.dt_name = "arm,cortex-a9",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A12,
+		.core_name = "Cortex-A12",
+		.pmu_name = "ARMv7_Cortex_A12",
+		.pmnc_name = "ARM_Cortex-A12",
+		.dt_name = "arm,cortex-a12",
 		.pmnc_counters = 6,
 	},
 	{
@@ -268,6 +282,7 @@
 		.core_name = "Cortex-A15",
 		.pmu_name = "ARMv7_Cortex_A15",
 		.pmnc_name = "ARM_Cortex-A15",
+		.dt_name = "arm,cortex-a15",
 		.pmnc_counters = 6,
 	},
 	{
@@ -304,12 +319,14 @@
 		.cpuid = CORTEX_A53,
 		.core_name = "Cortex-A53",
 		.pmnc_name = "ARM_Cortex-A53",
+		.dt_name = "arm,cortex-a53",
 		.pmnc_counters = 6,
 	},
 	{
 		.cpuid = CORTEX_A57,
 		.core_name = "Cortex-A57",
 		.pmnc_name = "ARM_Cortex-A57",
+		.dt_name = "arm,cortex-a57",
 		.pmnc_counters = 6,
 	},
 	{
@@ -491,13 +508,11 @@
 
 	per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
 
-#if GATOR_LIVE
 	if (gator_live_rate > 0) {
 		while (time > per_cpu(gator_buffer_commit_time, cpu)) {
 			per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
 		}
 	}
-#endif
 
 	marshal_frame(cpu, buftype);
 
@@ -521,14 +536,14 @@
 	off_t offset = 0;
 	unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
 
-	if (cookie == NO_COOKIE || cookie == INVALID_COOKIE) {
+	if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE) {
 		offset = address;
 	}
 
 	marshal_backtrace(offset & ~1, cookie);
 }
 
-static void gator_add_sample(int cpu, struct pt_regs *const regs)
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
 {
 	bool inKernel;
 	unsigned long exec_cookie;
@@ -539,7 +554,7 @@
 	inKernel = !user_mode(regs);
 	exec_cookie = get_exec_cookie(cpu, current);
 
-	if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel))
+	if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel, time))
 		return;
 
 	if (inKernel) {
@@ -553,7 +568,7 @@
 			arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
 	}
 
-	marshal_backtrace_footer();
+	marshal_backtrace_footer(time);
 }
 
 /******************************************************************************
@@ -567,14 +582,15 @@
 
 void gator_backtrace_handler(struct pt_regs *const regs)
 {
+	u64 time = gator_get_time();
 	int cpu = get_physical_cpu();
 
 	// Output backtrace
-	gator_add_sample(cpu, regs);
+	gator_add_sample(cpu, regs, time);
 
 	// Collect counters
 	if (!per_cpu(collecting, cpu)) {
-		collect_counters();
+		collect_counters(time);
 	}
 }
 
@@ -640,6 +656,25 @@
 	}
 }
 
+#if defined(__arm__) || defined(__aarch64__)
+static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu) {
+	const char *core_name = NULL;
+	char core_name_buf[32];
+
+	if (!sent_core_name[cpu]) {
+		if (gator_cpu != NULL) {
+			core_name = gator_cpu->core_name;
+		} else {
+			snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			core_name = core_name_buf;
+		}
+
+		marshal_core_name(cpu, cpuid, core_name);
+		sent_core_name[cpu] = true;
+	}
+}
+#endif
+
 // This function runs in interrupt context and on the appropriate core
 static void gator_timer_online(void *migrate)
 {
@@ -669,20 +704,8 @@
 
 #if defined(__arm__) || defined(__aarch64__)
 	if (!sent_core_name[cpu]) {
-		const char *core_name = NULL;
 		const u32 cpuid = gator_cpuid();
-		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
-		char core_name_buf[32];
-
-		if (gator_cpu != NULL) {
-			core_name = gator_cpu->core_name;
-		} else {
-			snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
-			core_name = core_name_buf;
-		}
-
-		marshal_core_name(cpuid, core_name);
-		sent_core_name[cpu] = true;
+		gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid));
 	}
 #endif
 }
@@ -699,6 +722,8 @@
 	}
 }
 
+#include "gator_iks.c"
+
 int gator_timer_start(unsigned long sample_rate)
 {
 	int cpu;
@@ -718,6 +743,7 @@
 	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
 		return -1;
 
+	gator_send_iks_core_names();
 	for_each_online_cpu(cpu) {
 		gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
 	}
@@ -740,6 +766,7 @@
 
 	// getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases.
 	// up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution.
+	// This doesn't work well with interrupts, but that it's OK - the real concern is to catch big jumps in time
 	prev_timestamp = per_cpu(last_timestamp, cpu);
 	if (prev_timestamp <= timestamp) {
 		per_cpu(last_timestamp, cpu) = timestamp;
@@ -759,8 +786,6 @@
 /******************************************************************************
  * cpu hotplug and pm notifiers
  ******************************************************************************/
-#include "gator_iks.c"
-
 static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	int cpu = lcpu_to_pcpu((long)hcpu);
@@ -865,7 +890,7 @@
 	gator_monotonic_started = gator_get_time();
 	local_irq_restore(flags);
 
-	marshal_summary(timestamp, uptime, uname_buf);
+	marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf);
 }
 
 int gator_events_install(struct gator_interface *interface)
@@ -1062,9 +1087,7 @@
 			per_cpu(gator_buffer_write, cpu)[i] = 0;
 			per_cpu(gator_buffer_commit, cpu)[i] = 0;
 			per_cpu(buffer_space_available, cpu)[i] = true;
-#if GATOR_LIVE
 			per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
-#endif
 
 			// Annotation is a special case that only uses a single buffer
 			if (cpu > 0 && i == ANNOTATE_BUF) {
@@ -1138,9 +1161,7 @@
 			per_cpu(gator_buffer_write, cpu)[i] = 0;
 			per_cpu(gator_buffer_commit, cpu)[i] = 0;
 			per_cpu(buffer_space_available, cpu)[i] = true;
-#if GATOR_LIVE
 			per_cpu(gator_buffer_commit_time, cpu) = 0;
-#endif
 		}
 		mutex_unlock(&gator_buffer_mutex);
 	}
@@ -1297,7 +1318,7 @@
 		if (buftype == ANNOTATE_BUF) {
 			wake_up(&gator_annotate_wait);
 		}
-	}	while (buffer_commit_ready(&cpu, &buftype));
+	} while (buffer_commit_ready(&cpu, &buftype));
 
 	mutex_unlock(&gator_buffer_mutex);
 
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
index 72dd5ef..3282de8 100644
--- a/drivers/gator/gator_marshaling.c
+++ b/drivers/gator/gator_marshaling.c
@@ -19,7 +19,11 @@
 	/* Add another character so the length isn't 0x0a bytes */ \
 	"5"
 
-static void marshal_summary(long long timestamp, long long uptime, const char * uname)
+#ifdef MALI_SUPPORT
+#include "gator_events_mali_common.h"
+#endif
+
+static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char * uname)
 {
 	unsigned long flags;
 	int cpu = 0;
@@ -28,12 +32,24 @@
 	gator_buffer_write_string(cpu, SUMMARY_BUF, NEWLINE_CANARY);
 	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, timestamp);
 	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, uptime);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta);
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "uname");
 	gator_buffer_write_string(cpu, SUMMARY_BUF, uname);
 #if GATOR_IKS_SUPPORT
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
 #endif
+	// Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release.
+#ifdef MALI_SUPPORT
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type");
+#if (MALI_SUPPORT == MALI_4xx)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx");
+#elif (MALI_SUPPORT == MALI_T6xx)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx");
+#else
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown");
+#endif
+#endif
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
 	// Commit the buffer now so it can be one of the first frames read by Streamline
 	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
@@ -73,23 +89,23 @@
 	local_irq_restore(flags);
 }
 
-static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel)
+static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel, u64 time)
 {
 	int cpu = get_physical_cpu();
-	u64 time = gator_get_time();
-	if (buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
-		gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
-		gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
-		gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel);
-		return true;
+	if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
+		// Check and commit; commit is set to occur once buffer is 3/4 full
+		buffer_check(cpu, BACKTRACE_BUF, time);
+
+		return false;
 	}
 
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, BACKTRACE_BUF, time);
+	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel);
 
-	return false;
+	return true;
 }
 
 static void marshal_backtrace(unsigned long address, int cookie)
@@ -99,13 +115,13 @@
 	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address);
 }
 
-static void marshal_backtrace_footer(void)
+static void marshal_backtrace_footer(u64 time)
 {
 	int cpu = get_physical_cpu();
 	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE);
 
 	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, BACKTRACE_BUF, gator_get_time());
+	buffer_check(cpu, BACKTRACE_BUF, time);
 }
 
 static bool marshal_event_header(u64 time)
@@ -239,6 +255,28 @@
 	local_irq_restore(flags);
 }
 
+static void marshal_sched_trace_start(int tgid, int pid, int cookie)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
 static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
 {
 	unsigned long cpu = get_physical_cpu(), flags;
@@ -357,17 +395,19 @@
 }
 
 #if defined(__arm__) || defined(__aarch64__)
-static void marshal_core_name(const int cpuid, const char *name)
+static void marshal_core_name(const int core, const int cpuid, const char *name)
 {
 	int cpu = get_physical_cpu();
 	unsigned long flags;
 	local_irq_save(flags);
 	if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) {
 		gator_buffer_write_packed_int(cpu, NAME_BUF, HRTIMER_CORE_NAME);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, core);
 		gator_buffer_write_packed_int(cpu, NAME_BUF, cpuid);
 		gator_buffer_write_string(cpu, NAME_BUF, name);
 	}
-	buffer_check(cpu, NAME_BUF, gator_get_time());
+	// Commit core names now so that they can show up in live
+	gator_commit_buffer(cpu, NAME_BUF, gator_get_time());
 	local_irq_restore(flags);
 }
 #endif
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
index 61ecbe3..12623c4 100644
--- a/drivers/gator/gator_trace_gpu.c
+++ b/drivers/gator/gator_trace_gpu.c
@@ -40,21 +40,27 @@
 	NUMBER_OF_GPU_UNITS
 };
 
-#define MALI_400     (0x0b07)
+#define MALI_4xx     (0x0b07)
 #define MALI_T6xx    (0x0056)
 
 struct mali_gpu_job {
 	int count;
-	int last_core;
 	int last_tgid;
 	int last_pid;
+	int last_job_id;
 };
 
 #define NUMBER_OF_GPU_CORES 16
 static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES];
 static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
 
-static void mali_gpu_enqueue(int unit, int core, int tgid, int pid)
+/* Only one event should be running on a unit and core at a time (ie, a start
+ * event can only be followed by a stop and vice versa), but because the kernel
+ * only knows when a job is enqueued and not started, it is possible for a
+ * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
+ * stop2 by queueing up start2 and releasing it when stop1 is received.
+ */
+static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id)
 {
 	int count;
 
@@ -63,23 +69,23 @@
 	BUG_ON(count < 0);
 	++mali_gpu_jobs[unit][core].count;
 	if (count) {
-		mali_gpu_jobs[unit][core].last_core = core;
 		mali_gpu_jobs[unit][core].last_tgid = tgid;
 		mali_gpu_jobs[unit][core].last_pid = pid;
+		mali_gpu_jobs[unit][core].last_job_id = job_id;
 	}
 	spin_unlock(&mali_gpu_jobs_lock);
 
 	if (!count) {
-		marshal_sched_gpu_start(unit, core, tgid, pid);
+		marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/);
 	}
 }
 
 static void mali_gpu_stop(int unit, int core)
 {
 	int count;
-	int last_core = 0;
 	int last_tgid = 0;
 	int last_pid = 0;
+	int last_job_id = 0;
 
 	spin_lock(&mali_gpu_jobs_lock);
 	if (mali_gpu_jobs[unit][core].count == 0) {
@@ -89,20 +95,20 @@
 	--mali_gpu_jobs[unit][core].count;
 	count = mali_gpu_jobs[unit][core].count;
 	if (count) {
-		last_core = mali_gpu_jobs[unit][core].last_core;
 		last_tgid = mali_gpu_jobs[unit][core].last_tgid;
 		last_pid = mali_gpu_jobs[unit][core].last_pid;
+		last_job_id = mali_gpu_jobs[unit][core].last_job_id;
 	}
 	spin_unlock(&mali_gpu_jobs_lock);
 
 	marshal_sched_gpu_stop(unit, core);
 	if (count) {
-		marshal_sched_gpu_start(unit, last_core, last_tgid, last_pid);
+		marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/);
 	}
 }
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
-#include "gator_events_mali_400.h"
+#include "gator_events_mali_4xx.h"
 
 /*
  * Taken from MALI_PROFILING_EVENT_CHANNEL_* in Mali DDK.
@@ -141,10 +147,10 @@
 	case EVENT_TYPE_START:
 		if (component == EVENT_CHANNEL_VP0) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1);
+			mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0);
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1);
+			mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0);
 		}
 		break;
 
@@ -173,9 +179,16 @@
 #endif
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+#if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
+#else
 GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid))
+#endif
 {
 	unsigned int component, state, unit;
+#if !defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+	unsigned char job_id = 0;
+#endif
 
 	component = (event_id >> 16) & 0xFF;	// component is an 8-bit field
 	state = (event_id >> 24) & 0xF;	// state is a 4-bit field
@@ -197,7 +210,7 @@
 	if (unit != GPU_UNIT_NONE) {
 		switch (state) {
 		case EVENT_TYPE_START:
-			mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid));
+			mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id);
 			break;
 		case EVENT_TYPE_STOP:
 			mali_gpu_stop(unit, 0);
@@ -214,7 +227,7 @@
 
 GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p))
 {
-	mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid);
+	mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0);
 }
 
 GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core))
@@ -229,6 +242,7 @@
 	 * Absence of gpu trace points is not an error
 	 */
 
+	memset(&mali_gpu_jobs, sizeof(mali_gpu_jobs), 0);
 	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
index e989f6a..e98815e 100644
--- a/drivers/gator/gator_trace_sched.c
+++ b/drivers/gator/gator_trace_sched.c
@@ -10,9 +10,6 @@
 #include <trace/events/sched.h>
 #include "gator.h"
 
-#define SCHED_SWITCH			1
-#define SCHED_PROCESS_EXIT		2
-
 #define TASK_MAP_ENTRIES		1024	/* must be power of 2 */
 #define TASK_MAX_COLLISIONS		2
 
@@ -92,14 +89,12 @@
 	}
 }
 
-static void collect_counters(void)
+static void collect_counters(u64 time)
 {
 	int *buffer, len, cpu = get_physical_cpu();
 	long long *buffer64;
 	struct gator_interface *gi;
-	u64 time;
 
-	time = gator_get_time();
 	if (marshal_event_header(time)) {
 		list_for_each_entry(gi, &gator_events, list) {
 			if (gi->read) {
@@ -113,49 +108,19 @@
 		// Only check after writing all counters so that time and corresponding counters appear in the same frame
 		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
 
-#if GATOR_LIVE
 		// Commit buffers on timeout
 		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
 			static const int buftypes[] = { COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
 			int i;
-			for (i = 0; i < sizeof(buftypes)/sizeof(buftypes[0]); ++i) {
+			for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
 				gator_commit_buffer(cpu, buftypes[i], time);
 			}
+			// Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full
+			if (on_primary_core() && spin_trylock(&annotate_lock)) {
+				gator_commit_buffer(0, ANNOTATE_BUF, time);
+				spin_unlock(&annotate_lock);
+			}
 		}
-#endif
-	}
-}
-
-static void probe_sched_write(int type, struct task_struct *task, struct task_struct *old_task)
-{
-	int cookie = 0, state = 0;
-	int cpu = get_physical_cpu();
-	int tgid = task->tgid;
-	int pid = task->pid;
-
-	if (type == SCHED_SWITCH) {
-		// do as much work as possible before disabling interrupts
-		cookie = get_exec_cookie(cpu, task);
-		emit_pid_name(task);
-		if (old_task->state == TASK_RUNNING) {
-			state = STATE_CONTENTION;
-		} else if (old_task->in_iowait) {
-			state = STATE_WAIT_ON_IO;
-		} else {
-			state = STATE_WAIT_ON_OTHER;
-		}
-
-		per_cpu(collecting, cpu) = 1;
-		collect_counters();
-		per_cpu(collecting, cpu) = 0;
-	}
-
-	// marshal_sched_trace() disables interrupts as the free may trigger while switch is writing to the buffer; disabling preemption is not sufficient
-	// is disable interrupts necessary now that exit is used instead of free?
-	if (type == SCHED_SWITCH) {
-		marshal_sched_trace_switch(tgid, pid, cookie, state);
-	} else {
-		marshal_sched_trace_exit(tgid, pid);
 	}
 }
 
@@ -165,18 +130,48 @@
 	marshal_sched_trace_switch(0, 0, 0, 0);
 }
 
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+	int cookie;
+	int cpu = get_physical_cpu();
+
+	cookie = get_exec_cookie(cpu, child);
+	emit_pid_name(child);
+
+	marshal_sched_trace_start(child->tgid, child->pid, cookie);
+}
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
 #else
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
 #endif
 {
-	probe_sched_write(SCHED_SWITCH, next, prev);
+	int cookie;
+	int state;
+	int cpu = get_physical_cpu();
+
+	// do as much work as possible before disabling interrupts
+	cookie = get_exec_cookie(cpu, next);
+	emit_pid_name(next);
+	if (prev->state == TASK_RUNNING) {
+		state = STATE_CONTENTION;
+	} else if (prev->in_iowait) {
+		state = STATE_WAIT_ON_IO;
+	} else {
+		state = STATE_WAIT_ON_OTHER;
+	}
+
+	per_cpu(collecting, cpu) = 1;
+	collect_counters(gator_get_time());
+	per_cpu(collecting, cpu) = 0;
+
+	marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
 }
 
-GATOR_DEFINE_PROBE(sched_process_exit, TP_PROTO(struct task_struct *p))
+GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
 {
-	probe_sched_write(SCHED_PROCESS_EXIT, p, 0);
+	marshal_sched_trace_exit(p->tgid, p->pid);
 }
 
 static void do_nothing(void *info)
@@ -188,10 +183,12 @@
 static int register_scheduler_tracepoints(void)
 {
 	// register tracepoints
+	if (GATOR_REGISTER_TRACE(sched_process_fork))
+		goto fail_sched_process_fork;
 	if (GATOR_REGISTER_TRACE(sched_switch))
 		goto fail_sched_switch;
-	if (GATOR_REGISTER_TRACE(sched_process_exit))
-		goto fail_sched_process_exit;
+	if (GATOR_REGISTER_TRACE(sched_process_free))
+		goto fail_sched_process_free;
 	pr_debug("gator: registered tracepoints\n");
 
 	// Now that the scheduler tracepoint is registered, force a context switch
@@ -201,9 +198,11 @@
 	return 0;
 
 	// unregister tracepoints on error
-fail_sched_process_exit:
+fail_sched_process_free:
 	GATOR_UNREGISTER_TRACE(sched_switch);
 fail_sched_switch:
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+fail_sched_process_fork:
 	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
 	return -1;
@@ -231,8 +230,9 @@
 
 static void unregister_scheduler_tracepoints(void)
 {
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
 	GATOR_UNREGISTER_TRACE(sched_switch);
-	GATOR_UNREGISTER_TRACE(sched_process_exit);
+	GATOR_UNREGISTER_TRACE(sched_process_free);
 	pr_debug("gator: unregistered tracepoints\n");
 }
 
diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
new file mode 100644
index 0000000..3db4543
--- /dev/null
+++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
@@ -0,0 +1,164 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2013 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+#define __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/*
+ * The number of processor cores.  Update to suit your hardware implementation.
+ */
+#define MAX_NUM_FP_CORES            (4)
+#define MAX_NUM_VP_CORES            (1)
+#define MAX_NUM_L2_CACHE_CORES      (1)
+
+enum counters
+{
+    /* Timeline activity */
+    ACTIVITY_VP_0 = 0,
+    ACTIVITY_FP_0,
+    ACTIVITY_FP_1,
+    ACTIVITY_FP_2,
+    ACTIVITY_FP_3,
+
+    /* L2 cache counters */
+    COUNTER_L2_0_C0,
+    COUNTER_L2_0_C1,
+
+    /* Vertex processor counters */
+    COUNTER_VP_0_C0,
+    COUNTER_VP_0_C1,
+
+    /* Fragment processor counters */
+    COUNTER_FP_0_C0,
+    COUNTER_FP_0_C1,
+    COUNTER_FP_1_C0,
+    COUNTER_FP_1_C1,
+    COUNTER_FP_2_C0,
+    COUNTER_FP_2_C1,
+    COUNTER_FP_3_C0,
+    COUNTER_FP_3_C1,
+
+    /* EGL Software Counters */
+    COUNTER_EGL_BLIT_TIME,
+
+    /* GLES Software Counters */
+    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_ARRAYS_CALLS,
+    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_POINTS,
+    COUNTER_GLES_DRAW_LINES,
+    COUNTER_GLES_DRAW_LINE_LOOP,
+    COUNTER_GLES_DRAW_LINE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLES,
+    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLE_FAN,
+    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+    COUNTER_GLES_UPLOAD_VBO_TIME,
+    COUNTER_GLES_NUM_FLUSHES,
+    COUNTER_GLES_NUM_VSHADERS_GENERATED,
+    COUNTER_GLES_NUM_FSHADERS_GENERATED,
+    COUNTER_GLES_VSHADER_GEN_TIME,
+    COUNTER_GLES_FSHADER_GEN_TIME,
+    COUNTER_GLES_INPUT_TRIANGLES,
+    COUNTER_GLES_VXCACHE_HIT,
+    COUNTER_GLES_VXCACHE_MISS,
+    COUNTER_GLES_VXCACHE_COLLISION,
+    COUNTER_GLES_CULLED_TRIANGLES,
+    COUNTER_GLES_CULLED_LINES,
+    COUNTER_GLES_BACKFACE_TRIANGLES,
+    COUNTER_GLES_GBCLIP_TRIANGLES,
+    COUNTER_GLES_GBCLIP_LINES,
+    COUNTER_GLES_TRIANGLES_DRAWN,
+    COUNTER_GLES_DRAWCALL_TIME,
+    COUNTER_GLES_TRIANGLES_COUNT,
+    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+    COUNTER_GLES_FAN_TRIANGLES_COUNT,
+    COUNTER_GLES_LINES_COUNT,
+    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+    COUNTER_GLES_STRIP_LINES_COUNT,
+    COUNTER_GLES_LOOP_LINES_COUNT,
+
+    COUNTER_FILMSTRIP,
+    COUNTER_FREQUENCY,
+    COUNTER_VOLTAGE,
+
+    NUMBER_OF_EVENTS
+};
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_3
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_3_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+/* Signifies that the system is able to report voltage and frequency numbers. */
+#define DVFS_REPORTED_BY_DDK 1
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+    u32 source0;
+    u32 value0;
+    u32 source1;
+    u32 value1;
+} _mali_profiling_core_counters;
+
+/*
+ * For compatibility with utgard.
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+    struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+typedef struct _mali_profiling_mali_version
+{
+    u32 mali_product_id;
+    u32 mali_version_major;
+    u32 mali_version_minor;
+    u32 num_of_l2_cores;
+    u32 num_of_fp_cores;
+    u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+extern u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+/*
+ * List of possible actions allowing DDK to be controlled by Streamline.
+ * The following numbers are used by DDK to control the frame buffer dumping.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE      (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 0000000..c02a1a4
--- /dev/null
+++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,202 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2013 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+/** The list of events supported by the Mali DDK. */
+typedef enum
+{
+    /* Vertex processor activity */
+    ACTIVITY_VP_0 = 0,
+
+    /* Fragment processor activity */
+    ACTIVITY_FP_0, /* 1 */
+    ACTIVITY_FP_1,
+    ACTIVITY_FP_2,
+    ACTIVITY_FP_3,
+    ACTIVITY_FP_4,
+    ACTIVITY_FP_5,
+    ACTIVITY_FP_6,
+    ACTIVITY_FP_7,
+
+    /* L2 cache counters */
+    COUNTER_L2_0_C0,
+    COUNTER_L2_0_C1,
+    COUNTER_L2_1_C0,
+    COUNTER_L2_1_C1,
+    COUNTER_L2_2_C0,
+    COUNTER_L2_2_C1,
+
+    /* Vertex processor counters */
+    COUNTER_VP_0_C0, /*15*/
+    COUNTER_VP_0_C1,
+
+    /* Fragment processor counters */
+    COUNTER_FP_0_C0,
+    COUNTER_FP_0_C1,
+    COUNTER_FP_1_C0,
+    COUNTER_FP_1_C1,
+    COUNTER_FP_2_C0,
+    COUNTER_FP_2_C1,
+    COUNTER_FP_3_C0,
+    COUNTER_FP_3_C1,
+    COUNTER_FP_4_C0,
+    COUNTER_FP_4_C1,
+    COUNTER_FP_5_C0,
+    COUNTER_FP_5_C1,
+    COUNTER_FP_6_C0,
+    COUNTER_FP_6_C1,
+    COUNTER_FP_7_C0,
+    COUNTER_FP_7_C1, /* 32 */
+
+    /*
+     * If more hardware counters are added, the _mali_osk_hw_counter_table
+     * below should also be updated.
+     */
+
+    /* EGL software counters */
+    COUNTER_EGL_BLIT_TIME,
+
+    /* GLES software counters */
+    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_ARRAYS_CALLS,
+    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_POINTS,
+    COUNTER_GLES_DRAW_LINES,
+    COUNTER_GLES_DRAW_LINE_LOOP,
+    COUNTER_GLES_DRAW_LINE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLES,
+    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLE_FAN,
+    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+    COUNTER_GLES_UPLOAD_VBO_TIME,
+    COUNTER_GLES_NUM_FLUSHES,
+    COUNTER_GLES_NUM_VSHADERS_GENERATED,
+    COUNTER_GLES_NUM_FSHADERS_GENERATED,
+    COUNTER_GLES_VSHADER_GEN_TIME,
+    COUNTER_GLES_FSHADER_GEN_TIME,
+    COUNTER_GLES_INPUT_TRIANGLES,
+    COUNTER_GLES_VXCACHE_HIT,
+    COUNTER_GLES_VXCACHE_MISS,
+    COUNTER_GLES_VXCACHE_COLLISION,
+    COUNTER_GLES_CULLED_TRIANGLES,
+    COUNTER_GLES_CULLED_LINES,
+    COUNTER_GLES_BACKFACE_TRIANGLES,
+    COUNTER_GLES_GBCLIP_TRIANGLES,
+    COUNTER_GLES_GBCLIP_LINES,
+    COUNTER_GLES_TRIANGLES_DRAWN,
+    COUNTER_GLES_DRAWCALL_TIME,
+    COUNTER_GLES_TRIANGLES_COUNT,
+    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+    COUNTER_GLES_FAN_TRIANGLES_COUNT,
+    COUNTER_GLES_LINES_COUNT,
+    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+    COUNTER_GLES_STRIP_LINES_COUNT,
+    COUNTER_GLES_LOOP_LINES_COUNT,
+
+    /* Framebuffer capture pseudo-counter */
+    COUNTER_FILMSTRIP,
+
+    NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_FILMSTRIP
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version
+{
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */