gator: Version 5.19

Signed-off-by: Jon Medhurst <tixy@linaro.org>
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
index 3dc9d05..2f86823 100644
--- a/drivers/gator/Makefile
+++ b/drivers/gator/Makefile
@@ -7,13 +7,14 @@
 obj-$(CONFIG_GATOR) := gator.o
 
 gator-y :=	gator_main.o \
-		gator_events_irq.o \
-		gator_events_sched.o \
-		gator_events_net.o \
 		gator_events_block.o \
+		gator_events_irq.o \
 		gator_events_meminfo.o \
-		gator_events_perf_pmu.o \
 		gator_events_mmapped.o \
+		gator_events_net.o \
+		gator_events_perf_pmu.o \
+		gator_events_sched.o \
+		gator_events_threads.o \
 
 # Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
 ifneq ($(GATOR_WITH_MALI_SUPPORT),)
@@ -48,10 +49,14 @@
   ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx
 endif
 
-# GATOR_TEST controls whether to include (=1) or exclude (=0) test code. 
+# GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
 GATOR_TEST ?= 0
 EXTRA_CFLAGS +=	-DGATOR_TEST=$(GATOR_TEST)
 
+# Should the original or new block_rq_complete API be used?
+OLD_BLOCK_RQ_COMPLETE := $(shell grep -A3 block_rq_complete include/trace/events/block.h | grep nr_bytes > /dev/null; echo $$?)
+EXTRA_CFLAGS += -DOLD_BLOCK_RQ_COMPLETE=$(OLD_BLOCK_RQ_COMPLETE)
+
 gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
 			gator_events_armv7.o \
 			gator_events_ccn-504.o \
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
index 586cd9e..5ad0254 100644
--- a/drivers/gator/gator.h
+++ b/drivers/gator/gator.h
@@ -42,6 +42,10 @@
 #define AARCH64     0xd0f
 #define OTHER       0xfff
 
+// gpu enums
+#define MALI_4xx     1
+#define MALI_T6xx    2
+
 #define MAXSIZE_CORE_NAME 32
 
 struct gator_cpu {
@@ -82,13 +86,21 @@
 		register_trace_##probe_name(probe_##probe_name)
 #	define GATOR_UNREGISTER_TRACE(probe_name) \
 		unregister_trace_##probe_name(probe_##probe_name)
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
 #	define GATOR_DEFINE_PROBE(probe_name, proto) \
 		static void probe_##probe_name(void *data, PARAMS(proto))
 #	define GATOR_REGISTER_TRACE(probe_name) \
 		register_trace_##probe_name(probe_##probe_name, NULL)
 #	define GATOR_UNREGISTER_TRACE(probe_name) \
 		unregister_trace_##probe_name(probe_##probe_name, NULL)
+#else
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		extern struct tracepoint *gator_tracepoint_##probe_name; \
+		static void probe_##probe_name(void *data, PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		tracepoint_probe_register(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		tracepoint_probe_unregister(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
 #endif
 
 /******************************************************************************
@@ -115,6 +127,8 @@
 
 void gator_backtrace_handler(struct pt_regs *const regs);
 
+void gator_marshal_activity_switch(int core, int key, int activity, int pid);
+
 #if !GATOR_IKS_SUPPORT
 
 #define get_physical_cpu() smp_processor_id()
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
index 9f305cf..e03c165 100644
--- a/drivers/gator/gator_backtrace.c
+++ b/drivers/gator/gator_backtrace.c
@@ -178,7 +178,7 @@
 	marshal_backtrace(PC_REG & ~1, NO_COOKIE, 1);
 #endif
 }
- 
+
 static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
 {
 	bool in_kernel;
diff --git a/drivers/gator/gator_buffer.c b/drivers/gator/gator_buffer.c
index eba22df..dfbc97d 100644
--- a/drivers/gator/gator_buffer.c
+++ b/drivers/gator/gator_buffer.c
@@ -37,12 +37,12 @@
 	case SCHED_TRACE_BUF:
 		frame = FRAME_SCHED_TRACE;
 		break;
-	case GPU_TRACE_BUF:
-		frame = FRAME_GPU_TRACE;
-		break;
 	case IDLE_BUF:
 		frame = FRAME_IDLE;
 		break;
+	case ACTIVITY_BUF:
+		frame = FRAME_ACTIVITY;
+		break;
 	default:
 		frame = -1;
 		break;
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
index 153119b..bd8a9ba 100644
--- a/drivers/gator/gator_events_armv7.c
+++ b/drivers/gator/gator_events_armv7.c
@@ -27,9 +27,9 @@
 // ccnt reg
 #define CCNT_REG	(1 << 31)
 
-#define CCNT 		0
+#define CCNT		0
 #define CNT0		1
-#define CNTMAX 		(6+1)
+#define CNTMAX		(6+1)
 
 static const char *pmnc_name;
 static int pmnc_counters;
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
index b2bc414..03eed4f 100644
--- a/drivers/gator/gator_events_block.c
+++ b/drivers/gator/gator_events_block.c
@@ -28,15 +28,25 @@
 static atomic_t blockCnt[BLOCK_TOTAL];
 static int blockGet[BLOCK_TOTAL * 4];
 
+// Tracepoint changed in 3.15 backported to older kernels. The Makefile tries to autodetect the correct value, but if it fails change the #if below
+#if OLD_BLOCK_RQ_COMPLETE
 GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
+#else
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq, unsigned int nr_bytes))
+#endif
 {
-	int write, size;
+	int write;
+	unsigned int size;
 
 	if (!rq)
 		return;
 
 	write = rq->cmd_flags & EVENTWRITE;
+#if OLD_BLOCK_RQ_COMPLETE
 	size = rq->resid_len;
+#else
+	size = nr_bytes;
+#endif
 
 	if (!size)
 		return;
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
index 85d4764..9e1c706 100644
--- a/drivers/gator/gator_events_mali_4xx.c
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -18,17 +18,27 @@
 #include "gator_events_mali_4xx.h"
 
 /*
- * There are (currently) four different variants of the comms between gator and Mali:
- * 1 (deprecated): No software counter support
- * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
- * 3 (default): Single tracepoint for all s/w counters in a bundle.
- * Interface style 3 is the default if no other is specified.  1 and 2 will be eliminated when
- * existing Mali DDKs are upgraded.
- * 4. As above, but for the Utgard (Mali-450) driver.
- */
+* There have been four different variants of the comms between gator and Mali depending on driver version:
+* # | DDK vsn range             | Support                                                             | Notes
+*
+* 1 | (obsolete)                | No software counter support                                         | Obsolete patches
+* 2 | (obsolete)                | Tracepoint called for each separate s/w counter value as it appears | Obsolete patches
+* 3 | r3p0-04rel0 - r3p2-01rel2 | Single tracepoint for all s/w counters in a bundle.                 |
+* 4 | r3p2-01rel3 - date        | As above but with extensions for MP devices (Mali-450)              | At least r4p0-00rel1
+*/
 
 #if !defined(GATOR_MALI_INTERFACE_STYLE)
-#define GATOR_MALI_INTERFACE_STYLE (3)
+#define GATOR_MALI_INTERFACE_STYLE (4)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+#error GATOR_MALI_INTERFACE_STYLE 1 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+#error GATOR_MALI_INTERFACE_STYLE 2 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+// Valid GATOR_MALI_INTERFACE_STYLE
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
 #endif
 
 #if GATOR_MALI_INTERFACE_STYLE < 4
@@ -44,6 +54,8 @@
 #error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
 #endif
 
+static const char mali_name[] = "Mali-4xx";
+
 /* gatorfs variables for counter enable state,
  * the event the counter should count and the
  * 'key' (a unique id set by gatord and returned
@@ -63,6 +75,7 @@
  */
 static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
 static unsigned long counter_prev[NUMBER_OF_EVENTS];
+static bool prev_set[NUMBER_OF_EVENTS];
 
 /* Note whether tracepoints have been registered */
 static int trace_registered;
@@ -76,18 +89,11 @@
 static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
 static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
 
-/**
- * Calculate the difference and handle the overflow.
- */
-static u32 get_difference(u32 start, u32 end)
-{
-	if (start - end >= 0) {
-		return start - end;
-	}
-
-	// Mali counters are unsigned 32 bit values that wrap.
-	return (4294967295u - end) + start;
-}
+extern mali_counter mali_activity[2];
+static const char* const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+};
 
 /**
  * Returns non-zero if the given counter ID is an activity counter.
@@ -112,40 +118,6 @@
 typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
 typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
 
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/**
- * Returns non-zero if the given counter ID is a software counter.
- */
-static inline int is_sw_counter(unsigned int event_id)
-{
-	return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
-}
-#endif
-
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/*
- * The Mali DDK uses s64 types to contain software counter values, but gator
- * can only use a maximum of 32 bits. This function scales a software counter
- * to an appropriate range.
- */
-static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
-{
-	u32 scaled_value;
-
-	switch (event_id) {
-	case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
-	case COUNTER_GLES_UPLOAD_VBO_TIME:
-		scaled_value = (u32)div_s64(value, 1000000);
-		break;
-	default:
-		scaled_value = (u32)value;
-		break;
-	}
-
-	return scaled_value;
-}
-#endif
-
 /* Probe for continuously sampled counter */
 #if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
 GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
@@ -172,16 +144,6 @@
 	}
 }
 
-#if GATOR_MALI_INTERFACE_STYLE == 2
-GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
-{
-	if (is_sw_counter(event_id)) {
-		counter_data[event_id] = scale_sw_counter_value(event_id, value);
-	}
-}
-#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
-
-#if GATOR_MALI_INTERFACE_STYLE >= 3
 GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
 {
 	u32 i;
@@ -193,7 +155,6 @@
 		}
 	}
 }
-#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
 
 /**
  * Create a single filesystem entry for a specified event.
@@ -254,6 +215,7 @@
 		symbol_put(_mali_profiling_get_mali_version);
 	} else {
 		printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+		printk("gator:  check your Mali DDK version versus the GATOR_MALI_INTERFACE_STYLE setting\n");
 	}
 }
 #endif
@@ -261,7 +223,6 @@
 static int create_files(struct super_block *sb, struct dentry *root)
 {
 	int event;
-	const char *mali_name = gator_mali_get_mali_name();
 
 	char buf[40];
 	int core_id;
@@ -278,6 +239,14 @@
 	initialise_version_info();
 #endif
 
+	mali_activity[0].cores = n_fp_cores;
+	mali_activity[1].cores = n_vp_cores;
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+			return -1;
+		}
+	}
+
 	/* Vertex processor counters */
 	for (core_id = 0; core_id < n_vp_cores; core_id++) {
 		int activity_counter_id = ACTIVITY_VP_0;
@@ -413,7 +382,6 @@
 static void mali_counter_initialize(void)
 {
 	int i;
-	int core_id;
 
 	mali_profiling_control_type *mali_control;
 
@@ -463,15 +431,10 @@
 		n_l2_cores = 0;
 	}
 
-	for (core_id = 0; core_id < n_l2_cores; core_id++) {
-		int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
-		counter_prev[counter_id] = 0;
-		counter_prev[counter_id + 1] = 0;
-	}
-
 	/* Clear counters in the start */
 	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
 		counter_data[i] = 0;
+		prev_set[i] = false;
 	}
 }
 
@@ -528,23 +491,11 @@
 		return -1;
 	}
 
-#if GATOR_MALI_INTERFACE_STYLE == 1
-	/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-	/* For patched Mali driver. */
-	if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
-		printk("gator: mali_sw_counter tracepoint failed to activate\n");
-		return -1;
-	}
-#elif GATOR_MALI_INTERFACE_STYLE >= 3
 	/* For Mali drivers with built-in support. */
 	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
 		printk("gator: mali_sw_counters tracepoint failed to activate\n");
 		return -1;
 	}
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
 
 	trace_registered = 1;
 
@@ -561,17 +512,8 @@
 	if (trace_registered) {
 		GATOR_UNREGISTER_TRACE(mali_hw_counter);
 
-#if GATOR_MALI_INTERFACE_STYLE == 1
-		/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-		/* For patched Mali driver. */
-		GATOR_UNREGISTER_TRACE(mali_sw_counter);
-#elif GATOR_MALI_INTERFACE_STYLE >= 3
 		/* For Mali drivers with built-in support. */
 		GATOR_UNREGISTER_TRACE(mali_sw_counters);
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
 
 		pr_debug("gator: mali timeline tracepoint deactivated\n");
 
@@ -636,21 +578,23 @@
 
 			per_core = &cache_values.cores[cache_id];
 
-			if (counter_enabled[counter_id_0]) {
+			if (counter_enabled[counter_id_0] && prev_set[counter_id_0]) {
 				// Calculate and save src0's counter val0
 				counter_dump[len++] = counter_key[counter_id_0];
-				counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+				counter_dump[len++] = per_core->value0 - counter_prev[counter_id_0];
 			}
 
-			if (counter_enabled[counter_id_1]) {
+			if (counter_enabled[counter_id_1] && prev_set[counter_id_1]) {
 				// Calculate and save src1's counter val1
 				counter_dump[len++] = counter_key[counter_id_1];
-				counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+				counter_dump[len++] = per_core->value1 - counter_prev[counter_id_1];
 			}
 
 			// Save the previous values for the counters.
 			counter_prev[counter_id_0] = per_core->value0;
+			prev_set[counter_id_0] = true;
 			counter_prev[counter_id_1] = per_core->value1;
+			prev_set[counter_id_1] = true;
 		}
 	}
 
@@ -709,6 +653,8 @@
 
 	pr_debug("gator: mali init\n");
 
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
+
 	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
 		counter_enabled[cnt] = 0;
 		counter_event[cnt] = 0;
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
index dc58dcf..4f2cce4c 100644
--- a/drivers/gator/gator_events_mali_common.c
+++ b/drivers/gator/gator_events_mali_common.c
@@ -8,26 +8,6 @@
  */
 #include "gator_events_mali_common.h"
 
-static u32 gator_mali_get_id(void)
-{
-	return MALI_SUPPORT;
-}
-
-extern const char *gator_mali_get_mali_name(void)
-{
-	u32 id = gator_mali_get_id();
-
-	switch (id) {
-	case MALI_T6xx:
-		return "Mali-T6xx";
-	case MALI_4xx:
-		return "Mali-4xx";
-	default:
-		pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
-		return "Mali-Unknown";
-	}
-}
-
 extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event)
 {
 	int err;
@@ -42,24 +22,31 @@
 		dir = gatorfs_mkdir(sb, root, buf);
 
 		if (dir == NULL) {
-			pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error creating file system for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
 
 		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
 		if (err != 0) {
-			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
 		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
 		if (err != 0) {
-			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
+		if (counter->cores != -1) {
+			err = gatorfs_create_ro_ulong(sb, dir, "cores", &counter->cores);
+			if (err != 0) {
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
+				return -1;
+			}
+		}
 		if (event != NULL) {
 			err = gatorfs_create_ulong(sb, dir, "event", event);
 			if (err != 0) {
-				pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
 				return -1;
 			}
 		}
@@ -77,5 +64,6 @@
 
 		counter->key = gator_events_get_key();
 		counter->enabled = 0;
+		counter->cores = -1;
 	}
 }
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
index 41c2a3c..91d871b 100644
--- a/drivers/gator/gator_events_mali_common.h
+++ b/drivers/gator/gator_events_mali_common.h
@@ -18,10 +18,6 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 
-/* Device codes for each known GPU */
-#define MALI_4xx     (0x0b07)
-#define MALI_T6xx    (0x0056)
-
 /* Ensure that MALI_SUPPORT has been defined to something. */
 #ifndef MALI_SUPPORT
 #error MALI_SUPPORT not defined!
@@ -35,8 +31,12 @@
  * Runtime state information for a counter.
  */
 typedef struct {
-	unsigned long key;	/* 'key' (a unique id set by gatord and returned by gator.ko) */
-	unsigned long enabled;	/* counter enable state */
+	// 'key' (a unique id set by gatord and returned by gator.ko)
+	unsigned long key;
+	// counter enable state
+	unsigned long enabled;
+	// for activity counters, the number of cores, otherwise -1
+	unsigned long cores;
 } mali_counter;
 
 /*
@@ -54,17 +54,9 @@
 extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
 
 /**
- * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
- *
- * @return The name as a constant string.
- */
-extern const char *gator_mali_get_mali_name(void);
-
-/**
  * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and
  * associate the key/enable values with this entry point.
  *
- * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name()
  * @param event_name The name of the event.
  * @param sb Linux super block
  * @param root Directory under which the entry will be created.
diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c
index 76f14ee..e56ba84 100644
--- a/drivers/gator/gator_events_mali_t6xx.c
+++ b/drivers/gator/gator_events_mali_t6xx.c
@@ -32,6 +32,8 @@
 #error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx
 #endif
 
+static const char mali_name[] = "Mali-T6xx";
+
 /* Counters for Mali-T6xx:
  *
  *  - Timeline events
@@ -292,7 +294,6 @@
 	 * Create the filesystem for all events
 	 */
 	int counter_index = 0;
-	const char *mali_name = gator_mali_get_mali_name();
 	mali_profiling_control_type *mali_control;
 
 	for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
@@ -317,7 +318,7 @@
 	}
 
 	mali_control = symbol_get(_mali_profiling_control);
-	if (mali_control) {	
+	if (mali_control) {
 		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) {
 			return -1;
 		}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c
index dfbc91f..3a072bb 100644
--- a/drivers/gator/gator_events_mali_t6xx_hw.c
+++ b/drivers/gator/gator_events_mali_t6xx_hw.c
@@ -16,7 +16,10 @@
 #include <asm/io.h>
 
 /* Mali T6xx DDK includes */
-#ifdef MALI_DIR_MIDGARD
+#if defined(MALI_SIMPLE_API)
+/* Header with wrapper functions to kbase structures and functions */
+#include "mali/mali_dd_gator_api.h"
+#elif defined(MALI_DIR_MIDGARD)
 /* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
 #include "mali_linux_trace.h"
 #include "mali_kbase.h"
@@ -28,37 +31,49 @@
 #include "kbase/src/linux/mali_kbase_mem_linux.h"
 #endif
 
-#include "gator_events_mali_common.h"
-
 /* If API version is not specified then assume API version 1. */
 #ifndef MALI_DDK_GATOR_API_VERSION
 #define MALI_DDK_GATOR_API_VERSION 1
 #endif
 
-#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2)
-#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK).
+#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2) && (MALI_DDK_GATOR_API_VERSION != 3)
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK, or 3 for r? DDK).
 #endif
 
+#include "gator_events_mali_common.h"
+
 /*
  * Mali-T6xx
  */
+#if MALI_DDK_GATOR_API_VERSION == 3
+typedef uint32_t kbase_dd_instr_hwcnt_dump_irq_type(struct mali_dd_hwcnt_handles *);
+typedef uint32_t kbase_dd_instr_hwcnt_dump_complete_type(struct mali_dd_hwcnt_handles *, uint32_t *);
+typedef struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init_type(struct mali_dd_hwcnt_info *);
+typedef void mali_dd_hwcnt_clear_type(struct mali_dd_hwcnt_info *, struct mali_dd_hwcnt_handles *);
+
+static kbase_dd_instr_hwcnt_dump_irq_type *kbase_dd_instr_hwcnt_dump_irq_symbol;
+static kbase_dd_instr_hwcnt_dump_complete_type *kbase_dd_instr_hwcnt_dump_complete_symbol;
+static mali_dd_hwcnt_init_type *mali_dd_hwcnt_init_symbol;
+static mali_dd_hwcnt_clear_type *mali_dd_hwcnt_clear_symbol;
+
+#else
 typedef struct kbase_device *kbase_find_device_type(int);
-typedef kbase_context *kbase_create_context_type(kbase_device *);
-typedef void kbase_destroy_context_type(kbase_context *);
+typedef struct kbase_context *kbase_create_context_type(struct kbase_device *);
+typedef void kbase_destroy_context_type(struct kbase_context *);
 
 #if MALI_DDK_GATOR_API_VERSION == 1
-typedef void *kbase_va_alloc_type(kbase_context *, u32);
-typedef void kbase_va_free_type(kbase_context *, void *);
+typedef void *kbase_va_alloc_type(struct kbase_context *, u32);
+typedef void kbase_va_free_type(struct kbase_context *, void *);
 #elif MALI_DDK_GATOR_API_VERSION == 2
-typedef void *kbase_va_alloc_type(kbase_context *, u32, kbase_hwc_dma_mapping * handle);
-typedef void kbase_va_free_type(kbase_context *, kbase_hwc_dma_mapping * handle);
+typedef void *kbase_va_alloc_type(struct kbase_context *, u32, kbase_hwc_dma_mapping * handle);
+typedef void kbase_va_free_type(struct kbase_context *, kbase_hwc_dma_mapping * handle);
 #endif
 
-typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *);
-typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *);
-typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *);
-typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *);
-typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *);
+typedef mali_error kbase_instr_hwcnt_enable_type(struct kbase_context *, struct kbase_uk_hwcnt_setup *);
+typedef mali_error kbase_instr_hwcnt_disable_type(struct kbase_context *);
+typedef mali_error kbase_instr_hwcnt_clear_type(struct kbase_context *);
+typedef mali_error kbase_instr_hwcnt_dump_irq_type(struct kbase_context *);
+typedef mali_bool kbase_instr_hwcnt_dump_complete_type(struct kbase_context *, mali_bool *);
 
 static kbase_find_device_type *kbase_find_device_symbol;
 static kbase_create_context_type *kbase_create_context_symbol;
@@ -70,6 +85,7 @@
 static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol;
 static kbase_va_free_type *kbase_va_free_symbol;
 static kbase_destroy_context_type *kbase_destroy_context_symbol;
+#endif
 
 static long shader_present_low = 0;
 
@@ -99,6 +115,8 @@
 	MMU_BLOCK
 };
 
+static const char mali_name[] = "Mali-T6xx";
+
 /* Counters for Mali-T6xx:
  *
  *  - HW counters, 4 blocks
@@ -381,6 +399,14 @@
 #define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
 #define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+/* Opaque handles for kbase_context and kbase_hwc_dma_mapping */
+static struct mali_dd_hwcnt_handles *handles;
+
+/* Information about hardware counters */
+static struct mali_dd_hwcnt_info *in_out_info;
+
+#else
 /* Memory to dump hardware counters into */
 static void *kernel_dump_buffer;
 
@@ -390,14 +416,9 @@
 #endif
 
 /* kbase context and device */
-static kbase_context *kbcontext = NULL;
+static struct kbase_context *kbcontext = NULL;
 static struct kbase_device *kbdevice = NULL;
-
-/*
- * The following function has no external prototype in older DDK revisions.  When the DDK
- * is updated then this should be removed.
- */
-struct kbase_device *kbase_find_device(int minor);
+#endif
 
 static volatile bool kbase_device_busy = false;
 static unsigned int num_hardware_counters_enabled;
@@ -412,6 +433,13 @@
  */
 static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
 
+extern mali_counter mali_activity[3];
+static const char* const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+	"opencl",
+};
+
 #define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
 	if(FUNCTION ## _symbol) \
 	{ \
@@ -431,8 +459,8 @@
 #define SYMBOL_CLEANUP(FUNCTION) \
 	if(FUNCTION ## _symbol) \
 	{ \
-        symbol_put(FUNCTION); \
-        FUNCTION ## _symbol = NULL; \
+		symbol_put(FUNCTION); \
+		FUNCTION ## _symbol = NULL; \
 	}
 
 /**
@@ -442,6 +470,12 @@
 static int init_symbols(void)
 {
 	int error_count = 0;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(mali_dd_hwcnt_init, error_count);
+	SYMBOL_GET(mali_dd_hwcnt_clear, error_count);
+#else
 	SYMBOL_GET(kbase_find_device, error_count);
 	SYMBOL_GET(kbase_create_context, error_count);
 	SYMBOL_GET(kbase_va_alloc, error_count);
@@ -452,6 +486,7 @@
 	SYMBOL_GET(kbase_instr_hwcnt_disable, error_count);
 	SYMBOL_GET(kbase_va_free, error_count);
 	SYMBOL_GET(kbase_destroy_context, error_count);
+#endif
 
 	return error_count;
 }
@@ -461,6 +496,12 @@
  */
 static void clean_symbols(void)
 {
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(mali_dd_hwcnt_init);
+	SYMBOL_CLEANUP(mali_dd_hwcnt_clear);
+#else
 	SYMBOL_CLEANUP(kbase_find_device);
 	SYMBOL_CLEANUP(kbase_create_context);
 	SYMBOL_CLEANUP(kbase_va_alloc);
@@ -471,6 +512,7 @@
 	SYMBOL_CLEANUP(kbase_instr_hwcnt_disable);
 	SYMBOL_CLEANUP(kbase_va_free);
 	SYMBOL_CLEANUP(kbase_destroy_context);
+#endif
 }
 
 /**
@@ -502,11 +544,13 @@
 
 static int start(void)
 {
-	kbase_uk_hwcnt_setup setup;
-	mali_error err;
-	int cnt;
-	u16 bitmask[] = { 0, 0, 0, 0 };
+#if MALI_DDK_GATOR_API_VERSION < 3
+	struct kbase_uk_hwcnt_setup setup;
 	unsigned long long shadersPresent = 0;
+	u16 bitmask[] = { 0, 0, 0, 0 };
+	mali_error err;
+#endif
+	int cnt;
 
 	/* Setup HW counters */
 	num_hardware_counters_enabled = 0;
@@ -515,18 +559,52 @@
 		pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
 	}
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+	/* Declare and initialise mali_dd_hwcnt_info structure */
+	in_out_info = kmalloc(sizeof(struct mali_dd_hwcnt_info), GFP_KERNEL);
+	for (cnt = 0; cnt < 4; cnt++){
+		in_out_info->bitmask[cnt] = 0;
+	}
+#endif
 	/* Calculate enable bitmasks based on counters_enabled array */
 	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
 		const mali_counter *counter = &counters[cnt];
 		if (counter->enabled) {
 			int block = GET_HW_BLOCK(cnt);
 			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+#if MALI_DDK_GATOR_API_VERSION == 3
+			in_out_info->bitmask[block] |= (1 << enable_bit);
+#else
 			bitmask[block] |= (1 << enable_bit);
+#endif
 			pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
 			num_hardware_counters_enabled++;
 		}
 	}
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+	/* Create a kbase context for HW counters */
+	if (num_hardware_counters_enabled > 0) {
+		if (init_symbols() > 0) {
+			clean_symbols();
+			/* No Mali driver code entrypoints found - not a fault. */
+			return 0;
+		}
+
+		handles = mali_dd_hwcnt_init_symbol(in_out_info);
+
+		if(handles == NULL) {
+			goto out;
+		}
+
+		/* See if we can get the number of shader cores */
+		shader_present_low = (unsigned long)in_out_info->shader_present_bitmap;
+
+		kbase_device_busy = false;
+	}
+
+	return 0;
+#else
 	/* Create a kbase context for HW counters */
 	if (num_hardware_counters_enabled > 0) {
 		if (init_symbols() > 0) {
@@ -606,6 +684,7 @@
 
 destroy_context:
 	kbase_destroy_context_symbol(kbcontext);
+#endif
 
 out:
 	clean_symbols();
@@ -615,7 +694,11 @@
 static void stop(void)
 {
 	unsigned int cnt;
-	kbase_context *temp_kbcontext;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	struct mali_dd_hwcnt_handles *temp_hand;
+#else
+	struct kbase_context *temp_kbcontext;
+#endif
 
 	pr_debug("gator: Mali-T6xx: stop\n");
 
@@ -625,6 +708,20 @@
 	}
 
 	/* Destroy the context for HW counters */
+#if MALI_DDK_GATOR_API_VERSION == 3
+	if (num_hardware_counters_enabled > 0 && handles != NULL) {
+		/*
+		 * Set the global variable to NULL before destroying it, because
+		 * other function will check this before using it.
+		 */
+		temp_hand = handles;
+		handles = NULL;
+
+		mali_dd_hwcnt_clear_symbol(in_out_info, temp_hand);
+
+		kfree(in_out_info);
+
+#else
 	if (num_hardware_counters_enabled > 0 && kbcontext != NULL) {
 		/*
 		 * Set the global variable to NULL before destroying it, because
@@ -642,6 +739,7 @@
 #endif
 
 		kbase_destroy_context_symbol(temp_kbcontext);
+#endif
 
 		pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
 
@@ -654,7 +752,7 @@
 	int cnt;
 	int len = 0;
 	u32 value = 0;
-	mali_bool success;
+	uint32_t success;
 
 	struct timespec current_time;
 	static u32 prev_time_s = 0;
@@ -686,12 +784,21 @@
 			0x500	/* VITHAR_MEMORY_SYSTEM,   Block 3 */
 		};
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+		if (!handles) {
+			return -1;
+		}
+
+		/* Mali symbols can be called safely since a kbcontext is valid */
+		if (kbase_dd_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) {
+#else
 		if (!kbcontext) {
 			return -1;
 		}
 
 		/* Mali symbols can be called safely since a kbcontext is valid */
 		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
+#endif
 			kbase_device_busy = false;
 
 			if (success == MALI_TRUE) {
@@ -702,7 +809,11 @@
 						const int block = GET_HW_BLOCK(cnt);
 						const int counter_offset = GET_COUNTER_OFFSET(cnt);
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+						const char* block_base_address = (char*)in_out_info->kernel_dump_buffer + vithar_blocks[block];
+#else
 						const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block];
+#endif
 
 						/* If counter belongs to shader block need to take into account all cores */
 						if (block == SHADER_BLOCK) {
@@ -741,7 +852,11 @@
 
 		if (!kbase_device_busy) {
 			kbase_device_busy = true;
+#if MALI_DDK_GATOR_API_VERSION == 3
+			kbase_dd_instr_hwcnt_dump_irq_symbol(handles);
+#else
 			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+#endif
 		}
 	}
 
@@ -760,7 +875,12 @@
 	 * Create the filesystem for all events
 	 */
 	int counter_index = 0;
-	const char *mali_name = gator_mali_get_mali_name();
+
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+			return -1;
+		}
+	}
 
 	for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) {
 		if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
@@ -786,6 +906,7 @@
 	test_all_is_read_scheduled();
 #endif
 
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
 	gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
 
 	return gator_events_install(&gator_events_mali_t6xx_interface);
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
index 3b248ec..5bc01c4 100644
--- a/drivers/gator/gator_events_mmapped.c
+++ b/drivers/gator/gator_events_mmapped.c
@@ -8,21 +8,25 @@
  * published by the Free Software Foundation.
  *
  * Similar entries to those below must be present in the events.xml file.
- * To add them to the events.xml, create an events-mmap.xml with the 
+ * To add them to the events.xml, create an events-mmap.xml with the
  * following contents and rebuild gatord:
  *
- * <counter_set name="mmapped_cnt" count="3"/>
- * <category name="mmapped" counter_set="mmapped_cnt" per_cpu="no">
- *   <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
- *   <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
- *   <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ * <category name="mmapped">
+ *   <event counter="mmapped_cnt0" title="Simulated1" name="Sine" display="maximum" class="absolute" description="Sort-of-sine"/>
+ *   <event counter="mmapped_cnt1" title="Simulated2" name="Triangle" display="maximum" class="absolute" description="Triangular wave"/>
+ *   <event counter="mmapped_cnt2" title="Simulated3" name="PWM" display="maximum" class="absolute" description="PWM Signal"/>
  * </category>
  *
- * When adding custom events, be sure do the following
+ * When adding custom events, be sure to do the following:
  * - add any needed .c files to the gator driver Makefile
  * - call gator_events_install in the events init function
  * - add the init function to GATOR_EVENTS_LIST in gator_main.c
  * - add a new events-*.xml file to the gator daemon and rebuild
+ *
+ * Troubleshooting:
+ * - verify the new events are part of events.xml, which is created when building the daemon
+ * - verify the new events exist at /dev/gator/events/ once gatord is launched
+ * - verify the counter name in the XML matches the name at /dev/gator/events
  */
 
 #include <linux/init.h>
@@ -37,7 +41,6 @@
 
 static struct {
 	unsigned long enabled;
-	unsigned long event;
 	unsigned long key;
 } mmapped_counters[MMAPPED_COUNTERS_NUM];
 
@@ -47,7 +50,7 @@
 
 /* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */
 static int gator_events_mmapped_create_files(struct super_block *sb,
-					    struct dentry *root)
+					     struct dentry *root)
 {
 	int i;
 
@@ -61,8 +64,6 @@
 			return -1;
 		gatorfs_create_ulong(sb, dir, "enabled",
 				     &mmapped_counters[i].enabled);
-		gatorfs_create_ulong(sb, dir, "event",
-				     &mmapped_counters[i].event);
 		gatorfs_create_ro_ulong(sb, dir, "key",
 					&mmapped_counters[i].key);
 	}
@@ -177,8 +178,7 @@
 		if (mmapped_counters[i].enabled) {
 			mmapped_buffer[len++] = mmapped_counters[i].key;
 			mmapped_buffer[len++] =
-			    mmapped_simulate(mmapped_counters[i].event,
-					    delta_in_us);
+			    mmapped_simulate(i, delta_in_us);
 		}
 	}
 
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
index 8b2d67a..06bbad5 100644
--- a/drivers/gator/gator_events_perf_pmu.c
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -470,10 +470,10 @@
 
 	switch (probe_cci_revision()) {
 	case 0:
-		cci_name = "cci-400";
+		cci_name = "CCI_400";
 		break;
 	case 1:
-		cci_name = "cci-400-r1";
+		cci_name = "CCI_400-r1";
 		break;
 	default:
 		pr_debug("gator: unrecognized cci-400 revision\n");
@@ -549,7 +549,7 @@
 		}
 
 		if (pe->pmu != NULL && type == pe->pmu->type) {
-			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0) {
+			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) {
 				gator_events_perf_pmu_cci_init(type);
 			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
 				found_cpu = true;
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
index 8ca251a..2e5be8d 100644
--- a/drivers/gator/gator_events_scorpion.c
+++ b/drivers/gator/gator_events_scorpion.c
@@ -26,9 +26,9 @@
 // ccnt reg
 #define CCNT_REG	(1 << 31)
 
-#define CCNT 		0
+#define CCNT		0
 #define CNT0		1
-#define CNTMAX 		(4+1)
+#define CNTMAX		(4+1)
 
 static unsigned long pmnc_enabled[CNTMAX];
 static unsigned long pmnc_event[CNTMAX];
diff --git a/drivers/gator/gator_events_threads.c b/drivers/gator/gator_events_threads.c
new file mode 100644
index 0000000..9de8586
--- /dev/null
+++ b/drivers/gator/gator_events_threads.c
@@ -0,0 +1,115 @@
+/*
+ * Sample activity provider
+ *
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See gator_events_mmapped.c for additional directions and
+ * troubleshooting.
+ *
+ * For this sample to work these entries must be present in the
+ * events.xml file. So create an events-threads.xml in the gator
+ * daemon source directory with the following contents and rebuild
+ * gatord:
+ *
+ * <category name="threads">
+ *   <event counter="Linux_threads" title="Linux" name="Threads" class="activity" activity1="odd" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="Linux syscall activity"/>
+ * </category>
+ */
+
+#include <trace/events/sched.h>
+
+#include "gator.h"
+
+static ulong threads_enabled;
+static ulong threads_key;
+static ulong threads_cores;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	int cpu = get_physical_cpu();
+	int pid = next->pid;
+	if (pid == 0) {
+		// idle
+		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+	} else if (pid & 1) {
+		// odd
+		gator_marshal_activity_switch(cpu, threads_key, 1, pid);
+	} else {
+		// even
+		//gator_marshal_activity_switch(cpu, threads_key, 2, current->pid);
+		// Multiple activities are not yet supported so emit idle
+		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+	}
+}
+
+// Adds Linux_threads directory and enabled, key, and cores files to /dev/gator/events
+static int gator_events_threads_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, "Linux_threads");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &threads_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &threads_key);
+	// Number of cores associated with this activity
+	gatorfs_create_ro_ulong(sb, dir, "cores", &threads_cores);
+
+	return 0;
+}
+
+static int gator_events_threads_start(void)
+{
+	int cpu;
+
+	if (threads_enabled) {
+		preempt_disable();
+		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+			gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+		}
+		preempt_enable();
+
+		if (GATOR_REGISTER_TRACE(sched_switch)) {
+			goto fail_sched_switch;
+		}
+	}
+
+	return 0;
+
+fail_sched_switch:
+	return -1;
+}
+
+static void gator_events_threads_stop(void)
+{
+	if (threads_enabled) {
+		GATOR_UNREGISTER_TRACE(sched_switch);
+	}
+
+	threads_enabled = 0;
+}
+
+static struct gator_interface gator_events_threads_interface = {
+	.create_files = gator_events_threads_create_files,
+	.start = gator_events_threads_start,
+	.stop = gator_events_threads_stop,
+};
+
+// Must not be static. Ensure that this init function is added to GATOR_EVENTS_LIST in gator_main.c
+int __init gator_events_threads_init(void)
+{
+	threads_enabled = 0;
+	threads_key = gator_events_get_key();
+	threads_cores = nr_cpu_ids;
+
+	return gator_events_install(&gator_events_threads_interface);
+}
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
index e90dfcc..9180b874 100644
--- a/drivers/gator/gator_iks.c
+++ b/drivers/gator/gator_iks.c
@@ -150,7 +150,7 @@
 	preempt_disable();
 	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
 		if (mpidr_cpus[cpu] != NULL) {
-			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]);
+			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid);
 		}
 	}
 	preempt_enable();
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
index e67f7c5..0d867f2 100644
--- a/drivers/gator/gator_main.c
+++ b/drivers/gator/gator_main.c
@@ -8,7 +8,7 @@
  */
 
 // This version must match the gator daemon version
-#define PROTOCOL_VERSION 18
+#define PROTOCOL_VERSION 19
 static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 
 #include <linux/slab.h>
@@ -71,8 +71,8 @@
 #define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
 #define ANNOTATE_BUFFER_SIZE      (128*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
 #define SCHED_TRACE_BUFFER_SIZE   (128*1024)
-#define GPU_TRACE_BUFFER_SIZE     (64*1024)	// gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
 #define IDLE_BUFFER_SIZE          (32*1024)	// idle counters have the core as part of the data and the core value in the frame header may be discarded
+#define ACTIVITY_BUFFER_SIZE      (128*1024)
 
 #define NO_COOKIE      0U
 #define UNRESOLVED_COOKIE ~0U
@@ -84,8 +84,8 @@
 #define FRAME_BLOCK_COUNTER 5
 #define FRAME_ANNOTATE      6
 #define FRAME_SCHED_TRACE   7
-#define FRAME_GPU_TRACE     8
 #define FRAME_IDLE          9
+#define FRAME_ACTIVITY     13
 
 #define MESSAGE_END_BACKTRACE 1
 
@@ -94,14 +94,9 @@
 #define MESSAGE_THREAD_NAME 2
 #define MESSAGE_LINK        4
 
-// GPU Trace Frame Messages
-#define MESSAGE_GPU_START 1
-#define MESSAGE_GPU_STOP  2
-
 // Scheduler Trace Frame Messages
 #define MESSAGE_SCHED_SWITCH 1
 #define MESSAGE_SCHED_EXIT   2
-#define MESSAGE_SCHED_START  3
 
 // Idle Frame Messages
 #define MESSAGE_IDLE_ENTER 1
@@ -111,6 +106,10 @@
 #define MESSAGE_SUMMARY   1
 #define MESSAGE_CORE_NAME 3
 
+// Activity Frame Messages
+#define MESSAGE_SWITCH 2
+#define MESSAGE_EXIT   3
+
 #define MAXSIZE_PACK32     5
 #define MAXSIZE_PACK64    10
 
@@ -132,8 +131,8 @@
 	BLOCK_COUNTER_BUF,
 	ANNOTATE_BUF,
 	SCHED_TRACE_BUF,
-	GPU_TRACE_BUF,
 	IDLE_BUF,
+	ACTIVITY_BUF,
 	NUM_GATOR_BUFS
 };
 
@@ -175,6 +174,7 @@
 
 static bool printed_monotonic_warning;
 
+static u32 gator_cpuids[NR_CPUS];
 static bool sent_core_name[NR_CPUS];
 
 static DEFINE_PER_CPU(bool, in_scheduler_context);
@@ -226,6 +226,7 @@
 	GATOR_EVENT(gator_events_perf_pmu_init) \
 	GATOR_EVENT(gator_events_sched_init) \
 	GATOR_EVENT(gator_events_scorpion_init) \
+	GATOR_EVENT(gator_events_threads_init) \
 
 #define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void);
 GATOR_EVENTS_LIST
@@ -570,25 +571,37 @@
 	}
 }
 
-#if defined(__arm__) || defined(__aarch64__)
-static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu)
+static void gator_send_core_name(const int cpu, const u32 cpuid)
 {
-	const char *core_name = NULL;
-	char core_name_buf[32];
+#if defined(__arm__) || defined(__aarch64__)
+	if (!sent_core_name[cpu] || (cpuid != gator_cpuids[cpu])) {
+		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
+		const char *core_name = NULL;
+		char core_name_buf[32];
 
-	if (!sent_core_name[cpu]) {
+		// Save off this cpuid
+		gator_cpuids[cpu] = cpuid;
 		if (gator_cpu != NULL) {
 			core_name = gator_cpu->core_name;
 		} else {
-			snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			if (cpuid == -1) {
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown");
+			} else {
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			}
 			core_name = core_name_buf;
 		}
 
 		marshal_core_name(cpu, cpuid, core_name);
 		sent_core_name[cpu] = true;
 	}
-}
 #endif
+}
+
+static void gator_read_cpuid(void * arg)
+{
+	gator_cpuids[get_physical_cpu()] = gator_cpuid();
+}
 
 // This function runs in interrupt context and on the appropriate core
 static void gator_timer_online(void *migrate)
@@ -598,6 +611,9 @@
 	int *buffer;
 	u64 time;
 
+	// Send what is currently running on this core
+	marshal_sched_trace_switch(current->pid, 0);
+
 	gator_trace_power_online();
 
 	// online any events and output counters
@@ -617,12 +633,7 @@
 		gator_hrtimer_online();
 	}
 
-#if defined(__arm__) || defined(__aarch64__)
-	if (!sent_core_name[cpu]) {
-		const u32 cpuid = gator_cpuid();
-		gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid));
-	}
-#endif
+	gator_send_core_name(cpu, gator_cpuid());
 }
 
 // This function runs in interrupt context and may be running on a core other than core 'cpu'
@@ -658,6 +669,13 @@
 	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
 		return -1;
 
+	// Send off the previously saved cpuids
+	for_each_present_cpu(cpu) {
+		preempt_disable();
+		gator_send_core_name(cpu, gator_cpuids[cpu]);
+		preempt_enable();
+	}
+
 	gator_send_iks_core_names();
 	for_each_online_cpu(cpu) {
 		gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
@@ -1009,12 +1027,12 @@
 	gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE;
 	gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1;
 
-	gator_buffer_size[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE;
-	gator_buffer_mask[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE - 1;
-
 	gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE;
 	gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1;
 
+	gator_buffer_size[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE;
+	gator_buffer_mask[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE - 1;
+
 	// Initialize percpu per buffer variables
 	for (i = 0; i < NUM_GATOR_BUFS; i++) {
 		// Verify buffers are a power of 2
@@ -1349,8 +1367,62 @@
 /******************************************************************************
  * Module
  ******************************************************************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
+
+#define GATOR_TRACEPOINTS \
+	GATOR_HANDLE_TRACEPOINT(block_rq_complete); \
+	GATOR_HANDLE_TRACEPOINT(cpu_frequency); \
+	GATOR_HANDLE_TRACEPOINT(cpu_idle); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_begin); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_current); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_finish); \
+	GATOR_HANDLE_TRACEPOINT(irq_handler_exit); \
+	GATOR_HANDLE_TRACEPOINT(mali_hw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_job_slots_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_in_use); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_released); \
+	GATOR_HANDLE_TRACEPOINT(mali_page_fault_insert_pages); \
+	GATOR_HANDLE_TRACEPOINT(mali_pm_status); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counters); \
+	GATOR_HANDLE_TRACEPOINT(mali_timeline_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_total_alloc_pages_change); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_alloc); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free_batched); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_exec); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_fork); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_free); \
+	GATOR_HANDLE_TRACEPOINT(sched_switch); \
+	GATOR_HANDLE_TRACEPOINT(softirq_exit); \
+
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	struct tracepoint *gator_tracepoint_##probe_name
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+
+static void gator_fct(struct tracepoint *tp, void *priv)
+{
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	if (strcmp(tp->name, #probe_name) == 0) { \
+		gator_tracepoint_##probe_name = tp; \
+		return; \
+	}
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+}
+
+#else
+
+#define for_each_kernel_tracepoint(fct, priv)
+
+#endif
+
 static int __init gator_module_init(void)
 {
+	for_each_kernel_tracepoint(gator_fct, NULL);
+
 	if (gatorfs_register()) {
 		return -1;
 	}
@@ -1362,6 +1434,10 @@
 
 	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
 
+	// Initialize the list of cpuids
+	memset(gator_cpuids, -1, sizeof(gator_cpuids));
+	on_each_cpu(gator_read_cpuid, NULL, 1);
+
 	return 0;
 }
 
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
index fd413ad..97b4ae6 100644
--- a/drivers/gator/gator_marshaling.c
+++ b/drivers/gator/gator_marshaling.c
@@ -231,75 +231,28 @@
 	// Check and commit; commit is set to occur once buffer is 3/4 full
 	buffer_check(cpu, COUNTER_BUF, time);
 }
+
+static void marshal_event_single64(int core, int key, long long value)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, COUNTER_BUF, 2 * MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, core);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, key);
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, value);
+	}
+	local_irq_restore(flags);
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, COUNTER_BUF, time);
+}
 #endif
 
-static void marshal_sched_gpu_start(int unit, int core, int tgid, int pid)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_START);
-		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, tgid);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, pid);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, GPU_TRACE_BUF, time);
-}
-
-static void marshal_sched_gpu_stop(int unit, int core)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_STOP);
-		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, GPU_TRACE_BUF, time);
-}
-
-static void marshal_sched_trace_start(int tgid, int pid, int cookie)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START);
-		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, SCHED_TRACE_BUF, time);
-}
-
-static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
+static void marshal_sched_trace_switch(int pid, int state)
 {
 	unsigned long cpu = get_physical_cpu(), flags;
 	u64 time;
@@ -312,9 +265,7 @@
 	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_SWITCH);
 		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state);
 	}
 	local_irq_restore(flags);
@@ -379,3 +330,33 @@
 	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
 }
 #endif
+
+static void marshal_activity_switch(int core, int key, int activity, int pid, int state)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[ACTIVITY_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, ACTIVITY_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, MESSAGE_SWITCH);
+		gator_buffer_write_packed_int64(cpu, ACTIVITY_BUF, time);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, core);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, key);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, activity);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, pid);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, state);
+	}
+	local_irq_restore(flags);
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, ACTIVITY_BUF, time);
+}
+
+void gator_marshal_activity_switch(int core, int key, int activity, int pid)
+{
+	// state is reserved for cpu use only
+	marshal_activity_switch(core, key, activity, pid, 0);
+}
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
index 6332098..a8b9e7d 100644
--- a/drivers/gator/gator_trace_gpu.c
+++ b/drivers/gator/gator_trace_gpu.c
@@ -23,8 +23,6 @@
 #endif
 #endif
 
-#include "gator_trace_gpu.h"
-
 /*
  * Taken from MALI_PROFILING_EVENT_TYPE_* items in Mali DDK.
  */
@@ -37,7 +35,6 @@
 /* Note whether tracepoints have been registered */
 static int mali_timeline_trace_registered;
 static int mali_job_slots_trace_registered;
-static int gpu_trace_registered;
 
 enum {
 	GPU_UNIT_NONE = 0,
@@ -47,19 +44,19 @@
 	NUMBER_OF_GPU_UNITS
 };
 
-#define MALI_4xx     (0x0b07)
-#define MALI_T6xx    (0x0056)
+#if defined(MALI_SUPPORT)
 
-struct mali_gpu_job {
+struct mali_activity {
+	int core;
+	int key;
 	int count;
-	int last_tgid;
+	int last_activity;
 	int last_pid;
-	int last_job_id;
 };
 
 #define NUMBER_OF_GPU_CORES 16
-static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES];
-static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
+static struct mali_activity mali_activities[NUMBER_OF_GPU_UNITS*NUMBER_OF_GPU_CORES];
+static DEFINE_SPINLOCK(mali_activities_lock);
 
 /* Only one event should be running on a unit and core at a time (ie, a start
  * event can only be followed by a stop and vice versa), but because the kernel
@@ -67,53 +64,97 @@
  * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
  * stop2 by queueing up start2 and releasing it when stop1 is received.
  */
-static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id)
+
+static int mali_activity_index(int core, int key)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mali_activities); ++i) {
+		if ((mali_activities[i].core == core) && (mali_activities[i].key == key)) {
+			break;
+		}
+		if ((mali_activities[i].core == 0) && (mali_activities[i].key == 0)) {
+			mali_activities[i].core = core;
+			mali_activities[i].key = key;
+			break;
+		}
+	}
+	BUG_ON(i >= ARRAY_SIZE(mali_activities));
+
+	return i;
+}
+
+static void mali_activity_enqueue(int core, int key, int activity, int pid)
+{
+	int i;
 	int count;
 
-	spin_lock(&mali_gpu_jobs_lock);
-	count = mali_gpu_jobs[unit][core].count;
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	count = mali_activities[i].count;
 	BUG_ON(count < 0);
-	++mali_gpu_jobs[unit][core].count;
+	++mali_activities[i].count;
 	if (count) {
-		mali_gpu_jobs[unit][core].last_tgid = tgid;
-		mali_gpu_jobs[unit][core].last_pid = pid;
-		mali_gpu_jobs[unit][core].last_job_id = job_id;
+		mali_activities[i].last_activity = activity;
+		mali_activities[i].last_pid = pid;
 	}
-	spin_unlock(&mali_gpu_jobs_lock);
+	spin_unlock(&mali_activities_lock);
 
 	if (!count) {
-		marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/);
+		gator_marshal_activity_switch(core, key, activity, pid);
 	}
 }
 
-static void mali_gpu_stop(int unit, int core)
+static void mali_activity_stop(int core, int key)
 {
+	int i;
 	int count;
-	int last_tgid = 0;
+	int last_activity = 0;
 	int last_pid = 0;
-	//int last_job_id = 0;
 
-	spin_lock(&mali_gpu_jobs_lock);
-	if (mali_gpu_jobs[unit][core].count == 0) {
-		spin_unlock(&mali_gpu_jobs_lock);
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	if (mali_activities[i].count == 0) {
+		spin_unlock(&mali_activities_lock);
 		return;
 	}
-	--mali_gpu_jobs[unit][core].count;
-	count = mali_gpu_jobs[unit][core].count;
+	--mali_activities[i].count;
+	count = mali_activities[i].count;
 	if (count) {
-		last_tgid = mali_gpu_jobs[unit][core].last_tgid;
-		last_pid = mali_gpu_jobs[unit][core].last_pid;
-		//last_job_id = mali_gpu_jobs[unit][core].last_job_id;
+		last_activity = mali_activities[i].last_activity;
+		last_pid = mali_activities[i].last_pid;
 	}
-	spin_unlock(&mali_gpu_jobs_lock);
+	spin_unlock(&mali_activities_lock);
 
-	marshal_sched_gpu_stop(unit, core);
+	gator_marshal_activity_switch(core, key, 0, 0);
 	if (count) {
-		marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/);
+		gator_marshal_activity_switch(core, key, last_activity, last_pid);
 	}
 }
 
+void mali_activity_clear(mali_counter mali_activity[], size_t mali_activity_size)
+{
+	int activity;
+	int cores;
+	int core;
+
+	for (activity = 0; activity < mali_activity_size; ++activity) {
+		cores = mali_activity[activity].cores;
+		if (cores < 0) {
+			cores = 1;
+		}
+		for (core = 0; core < cores; ++core) {
+			if (mali_activity[activity].enabled) {
+				gator_marshal_activity_switch(core, mali_activity[activity].key, 0, 0);
+			}
+		}
+	}
+}
+
+#endif
+
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
 #include "gator_events_mali_4xx.h"
 
@@ -142,6 +183,8 @@
 	EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1,
 };
 
+mali_counter mali_activity[2];
+
 GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4))
 {
 	unsigned int component, state;
@@ -154,18 +197,26 @@
 	case EVENT_TYPE_START:
 		if (component == EVENT_CHANNEL_VP0) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0);
+			if (mali_activity[1].enabled) {
+				mali_activity_enqueue(0, mali_activity[1].key, 1, d1);
+			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0);
+			if (mali_activity[0].enabled) {
+				mali_activity_enqueue(component - EVENT_CHANNEL_FP0, mali_activity[0].key, 1, d1);
+			}
 		}
 		break;
 
 	case EVENT_TYPE_STOP:
 		if (component == EVENT_CHANNEL_VP0) {
-			mali_gpu_stop(GPU_UNIT_VP, 0);
+			if (mali_activity[1].enabled) {
+				mali_activity_stop(0, mali_activity[1].key);
+			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
-			mali_gpu_stop(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0);
+			if (mali_activity[0].enabled) {
+				mali_activity_stop(component - EVENT_CHANNEL_FP0, mali_activity[0].key);
+			}
 		}
 		break;
 
@@ -186,6 +237,9 @@
 #endif
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+
+mali_counter mali_activity[3];
+
 #if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
 GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
 #else
@@ -217,31 +271,21 @@
 	if (unit != GPU_UNIT_NONE) {
 		switch (state) {
 		case EVENT_TYPE_START:
-			mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id);
+			if (mali_activity[component].enabled) {
+				mali_activity_enqueue(0, mali_activity[component].key, 1, (pid != 0 ? pid : tgid));
+			}
 			break;
 		case EVENT_TYPE_STOP:
-			mali_gpu_stop(unit, 0);
+		default: // Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
+			if (mali_activity[component].enabled) {
+				mali_activity_stop(0, mali_activity[component].key);
+			}
 			break;
-		default:
-			/*
-			 * Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
-			 */
-			mali_gpu_stop(unit, 0);
 		}
 	}
 }
 #endif
 
-GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p))
-{
-	mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0);
-}
-
-GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core))
-{
-	mali_gpu_stop(gpu_unit, gpu_core);
-}
-
 static int gator_trace_gpu_start(void)
 {
 	/*
@@ -249,32 +293,25 @@
 	 * Absence of gpu trace points is not an error
 	 */
 
-	memset(&mali_gpu_jobs, 0, sizeof(mali_gpu_jobs));
-	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+#if defined(MALI_SUPPORT)
+	memset(&mali_activities, 0, sizeof(mali_activities));
+#endif
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
 	if (!GATOR_REGISTER_TRACE(mali_timeline_event)) {
 		mali_timeline_trace_registered = 1;
 	}
 #endif
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
 	if (!GATOR_REGISTER_TRACE(mali_job_slots_event)) {
 		mali_job_slots_trace_registered = 1;
 	}
 #endif
 
-	if (!mali_timeline_trace_registered) {
-		if (GATOR_REGISTER_TRACE(gpu_activity_start)) {
-			return 0;
-		}
-		if (GATOR_REGISTER_TRACE(gpu_activity_stop)) {
-			GATOR_UNREGISTER_TRACE(gpu_activity_start);
-			return 0;
-		}
-		gpu_trace_registered = 1;
-	}
-
 	return 0;
 }
 
@@ -292,10 +329,5 @@
 	}
 #endif
 
-	if (gpu_trace_registered) {
-		GATOR_UNREGISTER_TRACE(gpu_activity_stop);
-		GATOR_UNREGISTER_TRACE(gpu_activity_start);
-	}
-
-	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 }
diff --git a/drivers/gator/gator_trace_gpu.h b/drivers/gator/gator_trace_gpu.h
deleted file mode 100644
index 5113d45..0000000
--- a/drivers/gator/gator_trace_gpu.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2010-2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#undef TRACE_GPU
-#define TRACE_GPU gpu
-
-#if !defined(_TRACE_GPU_H)
-#define _TRACE_GPU_H
-
-#include <linux/tracepoint.h>
-
-/*
- * UNIT - the GPU processor type
- *  1 = Vertex Processor
- *  2 = Fragment Processor
- *
- * CORE - the GPU processor core number
- *  this is not the CPU core number
- */
-
-/*
- * Tracepoint for calling GPU unit start activity on core
- */
-TRACE_EVENT(gpu_activity_start,
-
-	    TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p),
-
-	    TP_ARGS(gpu_unit, gpu_core, p),
-
-	    TP_STRUCT__entry(
-			     __field(int, gpu_unit)
-			     __field(int, gpu_core)
-			     __array(char, comm, TASK_COMM_LEN)
-			     __field(pid_t, pid)
-	    ),
-
-	    TP_fast_assign(
-			   __entry->gpu_unit = gpu_unit;
-			   __entry->gpu_core = gpu_core;
-			   memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-			   __entry->pid = p->pid;
-	    ),
-
-	    TP_printk("unit=%d core=%d comm=%s pid=%d",
-		      __entry->gpu_unit, __entry->gpu_core, __entry->comm,
-		      __entry->pid)
-    );
-
-/*
- * Tracepoint for calling GPU unit stop activity on core
- */
-TRACE_EVENT(gpu_activity_stop,
-
-	    TP_PROTO(int gpu_unit, int gpu_core),
-
-	    TP_ARGS(gpu_unit, gpu_core),
-
-	    TP_STRUCT__entry(
-			     __field(int, gpu_unit)
-			     __field(int, gpu_core)
-	    ),
-
-	    TP_fast_assign(
-			   __entry->gpu_unit = gpu_unit;
-			   __entry->gpu_core = gpu_core;
-	    ),
-
-	    TP_printk("unit=%d core=%d", __entry->gpu_unit, __entry->gpu_core)
-    );
-
-#endif /* _TRACE_GPU_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c
index 1895bb9..f2754b1 100644
--- a/drivers/gator/gator_trace_power.c
+++ b/drivers/gator/gator_trace_power.c
@@ -75,7 +75,7 @@
 GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu))
 {
 	cpu = lcpu_to_pcpu(cpu);
-	marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000);
+	marshal_event_single64(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000L);
 }
 
 GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu))
@@ -109,7 +109,7 @@
 	int pcpu = get_physical_cpu();
 	int lcpu = get_logical_cpu();
 	if (power_cpu_enabled[POWER_CPU_FREQ]) {
-		marshal_event_single(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000);
+		marshal_event_single64(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000L);
 	}
 }
 
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
index 52990e9..6550086 100644
--- a/drivers/gator/gator_trace_sched.c
+++ b/drivers/gator/gator_trace_sched.c
@@ -114,7 +114,7 @@
 
 		// Commit buffers on timeout
 		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
-			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
+			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF, ACTIVITY_BUF };
 			int i;
 
 			for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
@@ -137,35 +137,44 @@
 // special case used during a suspend of the system
 static void trace_sched_insert_idle(void)
 {
-	marshal_sched_trace_switch(0, 0, 0, 0);
+	marshal_sched_trace_switch(0, 0);
 }
 
-GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+static void gator_trace_emit_link(struct task_struct *p)
 {
 	int cookie;
 	int cpu = get_physical_cpu();
 
-	cookie = get_exec_cookie(cpu, child);
-	emit_pid_name(child);
+	cookie = get_exec_cookie(cpu, p);
+	emit_pid_name(p);
 
-	marshal_sched_trace_start(child->tgid, child->pid, cookie);
+	marshal_link(cookie, p->tgid, p->pid);
 }
 
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+	gator_trace_emit_link(child);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+GATOR_DEFINE_PROBE(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm))
+{
+	gator_trace_emit_link(p);
+}
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
 #else
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
 #endif
 {
-	int cookie;
 	int state;
 	int cpu = get_physical_cpu();
 
 	per_cpu(in_scheduler_context, cpu) = true;
 
 	// do as much work as possible before disabling interrupts
-	cookie = get_exec_cookie(cpu, next);
-	emit_pid_name(next);
 	if (prev->state == TASK_RUNNING) {
 		state = STATE_CONTENTION;
 	} else if (prev->in_iowait) {
@@ -178,7 +187,10 @@
 	collect_counters(gator_get_time(), prev);
 	per_cpu(collecting, cpu) = 0;
 
-	marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+	gator_trace_emit_link(next);
+#endif
+	marshal_sched_trace_switch(next->pid, state);
 
 	per_cpu(in_scheduler_context, cpu) = false;
 }
@@ -199,6 +211,10 @@
 	// register tracepoints
 	if (GATOR_REGISTER_TRACE(sched_process_fork))
 		goto fail_sched_process_fork;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	if (GATOR_REGISTER_TRACE(sched_process_exec))
+		goto fail_sched_process_exec;
+#endif
 	if (GATOR_REGISTER_TRACE(sched_switch))
 		goto fail_sched_switch;
 	if (GATOR_REGISTER_TRACE(sched_process_free))
@@ -216,15 +232,42 @@
 	GATOR_UNREGISTER_TRACE(sched_switch);
 fail_sched_switch:
 	GATOR_UNREGISTER_TRACE(sched_process_fork);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+fail_sched_process_exec:
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+#endif
 fail_sched_process_fork:
 	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
 	return -1;
 }
 
+static void unregister_scheduler_tracepoints(void)
+{
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+#endif
+	GATOR_UNREGISTER_TRACE(sched_switch);
+	GATOR_UNREGISTER_TRACE(sched_process_free);
+	pr_debug("gator: unregistered tracepoints\n");
+}
+
+static void gator_trace_sched_stop(void)
+{
+	int cpu;
+
+	unregister_scheduler_tracepoints();
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(taskname_keys, cpu));
+	}
+}
+
 static int gator_trace_sched_start(void)
 {
 	int cpu, size;
+	int ret;
 
 	for_each_present_cpu(cpu) {
 		size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
@@ -234,7 +277,9 @@
 		memset(per_cpu(taskname_keys, cpu), 0, size);
 	}
 
-	return register_scheduler_tracepoints();
+	ret = register_scheduler_tracepoints();
+
+	return ret;
 }
 
 static void gator_trace_sched_offline(void)
@@ -242,24 +287,6 @@
 	trace_sched_insert_idle();
 }
 
-static void unregister_scheduler_tracepoints(void)
-{
-	GATOR_UNREGISTER_TRACE(sched_process_fork);
-	GATOR_UNREGISTER_TRACE(sched_switch);
-	GATOR_UNREGISTER_TRACE(sched_process_free);
-	pr_debug("gator: unregistered tracepoints\n");
-}
-
-static void gator_trace_sched_stop(void)
-{
-	int cpu;
-	unregister_scheduler_tracepoints();
-
-	for_each_present_cpu(cpu) {
-		kfree(per_cpu(taskname_keys, cpu));
-	}
-}
-
 static void gator_trace_sched_init(void)
 {
 	int i;
diff --git a/drivers/gator/mali/mali_dd_gator_api.h b/drivers/gator/mali/mali_dd_gator_api.h
new file mode 100644
index 0000000..104b34f
--- /dev/null
+++ b/drivers/gator/mali/mali_dd_gator_api.h
@@ -0,0 +1,40 @@
+#if !defined(MALI_DDK_GATOR_API_VERSION)
+	#define MALI_DDK_GATOR_API_VERSION 3
+#endif
+#if !defined(MALI_TRUE)
+	#define MALI_TRUE                ((unsigned int)1)
+#endif
+
+#if !defined(MALI_FALSE)
+	#define MALI_FALSE               ((unsigned int)0)
+#endif
+
+struct mali_dd_hwcnt_info {
+
+	/* Passed from Gator to kbase */
+	//u32 in_mali_dd_hwcnt_version;
+	unsigned short int bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	unsigned int size;
+
+	unsigned int gpu_id;
+
+	unsigned int nr_cores;
+
+	unsigned int nr_core_groups;
+
+	/* The cached present bitmaps - these are the same as the corresponding hardware registers*/
+	unsigned long int shader_present_bitmap;
+};
+
+struct mali_dd_hwcnt_handles;
+extern struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init(struct mali_dd_hwcnt_info *in_out_info);
+extern void mali_dd_hwcnt_clear(struct mali_dd_hwcnt_info *in_out_info, struct mali_dd_hwcnt_handles *opaque_handles);
+extern unsigned int kbase_dd_instr_hwcnt_dump_complete(struct mali_dd_hwcnt_handles *opaque_handles, unsigned int * const success);
+extern unsigned int kbase_dd_instr_hwcnt_dump_irq(struct mali_dd_hwcnt_handles *opaque_handles);
diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_t6xx.mk
index 059d47a..fa7571d 100644
--- a/drivers/gator/mali_t6xx.mk
+++ b/drivers/gator/mali_t6xx.mk
@@ -21,6 +21,10 @@
 EXTRA_CFLAGS += -DMALI_DIR_MIDGARD=1
 endif
 
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_dd_gator_api.h),)
+EXTRA_CFLAGS += -DMALI_SIMPLE_API=1
+endif
+
 UMP_DIR = $(DDK_DIR)/include/linux
 
 # Include directories in the DDK