aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDrew Richardson <drew.richardson@arm.com>2013-10-23 12:00:00 -0700
committerDrew Richardson <drew.richardson@arm.com>2014-12-19 15:46:30 -0800
commitdf48c99f8da89ef9a91fac638b8e0528507bd088 (patch)
tree28fe6a9ff0438f349987df531cf5c779320b1a38
parentfebe35e3fa5d8ac027b914f14991c95846f49251 (diff)
gator: Version 5.165.16
Signed-off-by: Drew Richardson <drew.richardson@arm.com>
-rw-r--r--README_Streamline.txt2
-rw-r--r--daemon/Buffer.cpp14
-rw-r--r--daemon/Buffer.h5
-rw-r--r--daemon/CapturedXML.cpp8
-rw-r--r--daemon/Child.cpp3
-rw-r--r--daemon/ConfigurationXML.cpp38
-rw-r--r--daemon/ConfigurationXML.h2
-rw-r--r--daemon/Hwmon.cpp2
-rw-r--r--daemon/KMod.cpp4
-rw-r--r--daemon/OlyUtility.cpp5
-rw-r--r--daemon/Sender.cpp9
-rw-r--r--daemon/SessionData.h2
-rw-r--r--daemon/StreamlineSetup.cpp7
-rw-r--r--daemon/StreamlineSetup.h1
-rw-r--r--daemon/events-CCN-504.xml122
-rw-r--r--daemon/events-Cortex-A12.xml86
-rw-r--r--daemon/events-Linux.xml4
-rw-r--r--daemon/events-Mali-4xx.xml (renamed from daemon/events-Mali-400.xml)277
-rw-r--r--daemon/events-Mali-T6xx_hw.xml4
-rw-r--r--daemon/main.cpp115
-rw-r--r--driver/Makefile8
-rw-r--r--driver/gator.h8
-rw-r--r--driver/gator_annotate.c21
-rw-r--r--driver/gator_cookies.c8
-rw-r--r--driver/gator_events_ccn-504.c306
-rw-r--r--driver/gator_events_mali_4xx.c (renamed from driver/gator_events_mali_400.c)555
-rw-r--r--driver/gator_events_mali_4xx.h (renamed from driver/gator_events_mali_400.h)8
-rw-r--r--driver/gator_events_mali_common.c4
-rw-r--r--driver/gator_events_mali_common.h10
-rw-r--r--driver/gator_events_mali_t6xx_hw.c16
-rw-r--r--driver/gator_events_mmaped.c6
-rw-r--r--driver/gator_hrtimer_gator.c10
-rw-r--r--driver/gator_iks.c53
-rw-r--r--driver/gator_main.c95
-rw-r--r--driver/gator_marshaling.c74
-rw-r--r--driver/gator_trace_gpu.c40
-rw-r--r--driver/gator_trace_sched.c98
-rw-r--r--driver/mali/mali_mjollnir_profiling_gator_api.h164
-rw-r--r--driver/mali/mali_utgard_profiling_gator_api.h202
39 files changed, 1813 insertions, 583 deletions
diff --git a/README_Streamline.txt b/README_Streamline.txt
index 6a55f60..744c33f 100644
--- a/README_Streamline.txt
+++ b/README_Streamline.txt
@@ -117,7 +117,7 @@ There is a bug in some Linux kernels where perf misidentifies the CPU type. To s
# ls /sys/bus/event_source/devices/
ARMv7_Cortex_A9 breakpoint software tracepoint
-To workaround the issue try upgrading to a later kernel or comment out the gator_events_perf_pmu_cpu_init(gator_cpu, type); cal in gator_events_perf_pmu.c
+To work around the issue try upgrading to a later kernel or comment out the gator_events_perf_pmu_cpu_init(gator_cpu, type); call in gator_events_perf_pmu.c
*** Profiling the kernel (optional) ***
diff --git a/daemon/Buffer.cpp b/daemon/Buffer.cpp
index 56cf42c..c7abbf3 100644
--- a/daemon/Buffer.cpp
+++ b/daemon/Buffer.cpp
@@ -14,11 +14,7 @@
#define mask (size - 1)
-Buffer::Buffer (const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : core(core), buftype(buftype), size(size), readPos(0), writePos(0), commitPos(0), available(true), done(false), buf(new char[size]),
-#ifdef GATOR_LIVE
- commitTime(gSessionData->mLiveRate),
-#endif
- readerSem(readerSem) {
+Buffer::Buffer (const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : core(core), buftype(buftype), size(size), readPos(0), writePos(0), commitPos(0), available(true), done(false), buf(new char[size]), commitTime(gSessionData->mLiveRate), readerSem(readerSem) {
if ((size & mask) != 0) {
logg->logError(__FILE__, __LINE__, "Buffer size is not a power of 2");
handleException();
@@ -110,13 +106,11 @@ void Buffer::commit (const uint64_t time) {
logg->logMessage("Committing data readPos: %i writePos: %i commitPos: %i", readPos, writePos, commitPos);
commitPos = writePos;
-#ifdef GATOR_LIVE
if (gSessionData->mLiveRate > 0) {
while (time > commitTime) {
commitTime += gSessionData->mLiveRate;
}
}
-#endif
if (!done) {
frame();
@@ -131,11 +125,7 @@ void Buffer::check (const uint64_t time) {
if (filled < 0) {
filled += size;
}
- if (filled >= ((size * 3) / 4)
-#ifdef GATOR_LIVE
- || (gSessionData->mLiveRate > 0 && time >= commitTime)
-#endif
- ) {
+ if (filled >= ((size * 3) / 4) || (gSessionData->mLiveRate > 0 && time >= commitTime)) {
commit(time);
}
}
diff --git a/daemon/Buffer.h b/daemon/Buffer.h
index c460fb7..f820cfd 100644
--- a/daemon/Buffer.h
+++ b/daemon/Buffer.h
@@ -13,8 +13,6 @@
#include <stdint.h>
#include <semaphore.h>
-#define GATOR_LIVE
-
class Sender;
class Buffer {
@@ -56,10 +54,7 @@ private:
bool available;
bool done;
char *const buf;
-#ifdef GATOR_LIVE
uint64_t commitTime;
-#endif
-
sem_t *const readerSem;
};
diff --git a/daemon/CapturedXML.cpp b/daemon/CapturedXML.cpp
index af726df..30c4c44 100644
--- a/daemon/CapturedXML.cpp
+++ b/daemon/CapturedXML.cpp
@@ -43,6 +43,14 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) {
mxmlElementSetAttrf(target, "cores", "%d", gSessionData->mCores);
mxmlElementSetAttrf(target, "cpuid", "0x%x", gSessionData->mCpuId);
+ if (!gSessionData->mOneShot && (gSessionData->mSampleRate > 0)) {
+ mxmlElementSetAttr(target, "supports_live", "yes");
+ }
+
+ if (gSessionData->mLocalCapture) {
+ mxmlElementSetAttr(target, "local_capture", "yes");
+ }
+
mxml_node_t *counters = NULL;
for (x = 0; x < MAX_PERFORMANCE_COUNTERS; x++) {
const Counter & counter = gSessionData->mCounters[x];
diff --git a/daemon/Child.cpp b/daemon/Child.cpp
index 286c7e7..c054076 100644
--- a/daemon/Child.cpp
+++ b/daemon/Child.cpp
@@ -55,6 +55,9 @@ void handleException() {
child->socket->receiveNBytes(&discard, 1);
}
+ // Ensure all data is flushed
+ child->socket->shutdownConnection();
+
// this indirectly calls close socket which will ensure the data has been sent
delete sender;
}
diff --git a/daemon/ConfigurationXML.cpp b/daemon/ConfigurationXML.cpp
index fb00202..2a5252a 100644
--- a/daemon/ConfigurationXML.cpp
+++ b/daemon/ConfigurationXML.cpp
@@ -27,14 +27,7 @@ ConfigurationXML::ConfigurationXML() {
char path[PATH_MAX];
- if (gSessionData->mConfigurationXMLPath) {
- strncpy(path, gSessionData->mConfigurationXMLPath, PATH_MAX);
- } else {
- if (util->getApplicationFullPath(path, PATH_MAX) != 0) {
- logg->logMessage("Unable to determine the full path of gatord, the cwd will be used");
- }
- strncat(path, "configuration.xml", PATH_MAX - strlen(path) - 1);
- }
+ getPath(path);
mConfigurationXML = util->readFromDisk(path);
for (int retryCount = 0; retryCount < 2; ++retryCount) {
@@ -48,12 +41,7 @@ ConfigurationXML::ConfigurationXML() {
int ret = parse(mConfigurationXML);
if (ret == 1) {
- // remove configuration.xml on disk to use the default
- if (remove(path) != 0) {
- logg->logError(__FILE__, __LINE__, "Invalid configuration.xml file detected and unable to delete it. To resolve, delete configuration.xml on disk");
- handleException();
- }
- logg->logMessage("Invalid configuration.xml file detected and removed");
+ remove();
// Free the current configuration and reload
free((void*)mConfigurationXML);
@@ -197,3 +185,25 @@ void ConfigurationXML::getDefaultConfigurationXml(const char * & xml, unsigned i
xml = (const char *)configuration_xml;
len = configuration_xml_len;
}
+
+void ConfigurationXML::getPath(char* path) {
+ if (gSessionData->mConfigurationXMLPath) {
+ strncpy(path, gSessionData->mConfigurationXMLPath, PATH_MAX);
+ } else {
+ if (util->getApplicationFullPath(path, PATH_MAX) != 0) {
+ logg->logMessage("Unable to determine the full path of gatord, the cwd will be used");
+ }
+ strncat(path, "configuration.xml", PATH_MAX - strlen(path) - 1);
+ }
+}
+
+void ConfigurationXML::remove() {
+ char path[PATH_MAX];
+ getPath(path);
+
+ if (::remove(path) != 0) {
+ logg->logError(__FILE__, __LINE__, "Invalid configuration.xml file detected and unable to delete it. To resolve, delete configuration.xml on disk");
+ handleException();
+ }
+ logg->logMessage("Invalid configuration.xml file detected and removed");
+}
diff --git a/daemon/ConfigurationXML.h b/daemon/ConfigurationXML.h
index f709ad1..eba7dc4 100644
--- a/daemon/ConfigurationXML.h
+++ b/daemon/ConfigurationXML.h
@@ -14,6 +14,8 @@
class ConfigurationXML {
public:
static void getDefaultConfigurationXml(const char * & xml, unsigned int & len);
+ static void getPath(char* path);
+ static void remove();
ConfigurationXML();
~ConfigurationXML();
diff --git a/daemon/Hwmon.cpp b/daemon/Hwmon.cpp
index 9475215..0792568 100644
--- a/daemon/Hwmon.cpp
+++ b/daemon/Hwmon.cpp
@@ -73,7 +73,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, int key, const sensors_chip_name
len = snprintf(NULL, 0, "hwmon_%s_%d", chip_name, feature->number) + 1;
name = new char[len];
- len = snprintf(name, len, "hwmon_%s_%d", chip_name, feature->number);
+ snprintf(name, len, "hwmon_%s_%d", chip_name, feature->number);
delete [] chip_name;
diff --git a/daemon/KMod.cpp b/daemon/KMod.cpp
index 04f3330..559297f 100644
--- a/daemon/KMod.cpp
+++ b/daemon/KMod.cpp
@@ -13,6 +13,7 @@
#include <unistd.h>
#include "Collector.h"
+#include "ConfigurationXML.h"
#include "Counter.h"
#include "Logging.h"
@@ -73,7 +74,8 @@ void KMod::setupCounter(Counter &counter) {
}
counter.setCount(count);
} else if (counter.getCount() > 0) {
- logg->logError(__FILE__, __LINE__, "Event Based Sampling is only supported with kernel versions 3.0.0 and higher with CONFIG_PERF_EVENTS=y, and CONFIG_HW_PERF_EVENTS=y\n");
+ ConfigurationXML::remove();
+ logg->logError(__FILE__, __LINE__, "Event Based Sampling is only supported with kernel versions 3.0.0 and higher with CONFIG_PERF_EVENTS=y, and CONFIG_HW_PERF_EVENTS=y. The invalid configuration.xml has been removed.\n");
handleException();
}
}
diff --git a/daemon/OlyUtility.cpp b/daemon/OlyUtility.cpp
index b29a1e9..0b22d6e 100644
--- a/daemon/OlyUtility.cpp
+++ b/daemon/OlyUtility.cpp
@@ -83,7 +83,7 @@ int OlyUtility::getApplicationFullPath(char* fullpath, int sizeOfPath) {
}
fullpath[length] = 0;
- fullpath = getPathPart(fullpath);
+ getPathPart(fullpath);
return 0;
}
@@ -171,7 +171,7 @@ int OlyUtility::appendToDisk(const char* path, const char* data) {
*/
#define TRANSFER_SIZE 1024
int OlyUtility::copyFile(const char* srcFile, const char* dstFile) {
- char* buffer = (char*)malloc(TRANSFER_SIZE);
+ char buffer[TRANSFER_SIZE];
FILE * f_src = fopen(srcFile,"rb");
if (!f_src) {
return 0;
@@ -197,7 +197,6 @@ int OlyUtility::copyFile(const char* srcFile, const char* dstFile) {
}
fclose(f_src);
fclose(f_dst);
- free(buffer);
return 1;
}
diff --git a/daemon/Sender.cpp b/daemon/Sender.cpp
index 159503f..8eb348f 100644
--- a/daemon/Sender.cpp
+++ b/daemon/Sender.cpp
@@ -93,8 +93,13 @@ void Sender::writeData(const char* data, int length, int type) {
logg->logMessage("Sending data with length %d", length);
if (type != RESPONSE_APC_DATA) {
// type and length already added by the Collector for apc data
- mDataSocket->send((char*)&type, 1);
- mDataSocket->send((char*)&length, sizeof(length));
+ unsigned char header[5];
+ header[0] = type;
+ header[1] = (length >> 0) & 0xff;
+ header[2] = (length >> 8) & 0xff;
+ header[3] = (length >> 16) & 0xff;
+ header[4] = (length >> 24) & 0xff;
+ mDataSocket->send((char*)&header, sizeof(header));
}
// 100Kbits/sec * alarmDuration sec / 8 bits/byte
diff --git a/daemon/SessionData.h b/daemon/SessionData.h
index 22a8af0..e72fa5d 100644
--- a/daemon/SessionData.h
+++ b/daemon/SessionData.h
@@ -16,7 +16,7 @@
#define MAX_PERFORMANCE_COUNTERS 50
-#define PROTOCOL_VERSION 14
+#define PROTOCOL_VERSION 16
#define PROTOCOL_DEV 1000 // Differentiates development versions (timestamp) from release versions
struct ImageLinkList {
diff --git a/daemon/StreamlineSetup.cpp b/daemon/StreamlineSetup.cpp
index e196a7d..2faada2 100644
--- a/daemon/StreamlineSetup.cpp
+++ b/daemon/StreamlineSetup.cpp
@@ -255,12 +255,7 @@ void StreamlineSetup::sendCounters() {
void StreamlineSetup::writeConfiguration(char* xml) {
char path[PATH_MAX];
- if (gSessionData->mConfigurationXMLPath) {
- strncpy(path, gSessionData->mConfigurationXMLPath, PATH_MAX);
- } else {
- util->getApplicationFullPath(path, PATH_MAX);
- strncat(path, "configuration.xml", PATH_MAX - strlen(path) - 1);
- }
+ ConfigurationXML::getPath(path);
if (util->writeToDisk(path, xml) < 0) {
logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify write permissions to this path.", path);
diff --git a/daemon/StreamlineSetup.h b/daemon/StreamlineSetup.h
index a27a7ac..092d956 100644
--- a/daemon/StreamlineSetup.h
+++ b/daemon/StreamlineSetup.h
@@ -26,7 +26,6 @@ public:
StreamlineSetup(OlySocket *socket);
~StreamlineSetup();
private:
- int mNumConnections;
OlySocket* mSocket;
char* readCommand(int*);
diff --git a/daemon/events-CCN-504.xml b/daemon/events-CCN-504.xml
new file mode 100644
index 0000000..cfabf65
--- /dev/null
+++ b/daemon/events-CCN-504.xml
@@ -0,0 +1,122 @@
+ <counter_set name="CCN-504_cnt" count="4"/>
+ <category name="CCN-504" counter_set="CCN-504_cnt">
+ <event counter="CCN-504_ccnt" title="CCN-504 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
+
+ <option_set name="XP_Region">
+ <option event_delta="0x400000" name="XP 0" description="Crosspoint 0"/>
+ <option event_delta="0x410000" name="XP 1" description="Crosspoint 1"/>
+ <option event_delta="0x420000" name="XP 2" description="Crosspoint 2"/>
+ <option event_delta="0x430000" name="XP 3" description="Crosspoint 3"/>
+ <option event_delta="0x440000" name="XP 4" description="Crosspoint 4"/>
+ <option event_delta="0x450000" name="XP 5" description="Crosspoint 5"/>
+ <option event_delta="0x460000" name="XP 6" description="Crosspoint 6"/>
+ <option event_delta="0x470000" name="XP 7" description="Crosspoint 7"/>
+ <option event_delta="0x480000" name="XP 8" description="Crosspoint 8"/>
+ <option event_delta="0x490000" name="XP 9" description="Crosspoint 9"/>
+ <option event_delta="0x4A0000" name="XP 10" description="Crosspoint 10"/>
+ </option_set>
+
+ <event event="0x0801" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: H-bit" description="Bus 0: REQ: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x0802" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: S-bit" description="Bus 0: REQ: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x0803" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: P-Cnt" description="Bus 0: REQ: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x0804" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: TknV" description="Bus 0: REQ: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0809" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: H-bit" description="Bus 1: REQ: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x080A" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: S-bit" description="Bus 1: REQ: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x080B" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: P-Cnt" description="Bus 1: REQ: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x080C" option_set="XP_Region" title="CCN-504" name="Bus 1: REQ: TknV" description="Bus 1: REQ: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0811" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: H-bit" description="Bus 0: RSP: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x0812" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: S-bit" description="Bus 0: RSP: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x0813" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: P-Cnt" description="Bus 0: RSP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x0814" option_set="XP_Region" title="CCN-504" name="Bus 0: RSP: TknV" description="Bus 0: RSP: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0819" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: H-bit" description="Bus 1: RSP: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x081A" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: S-bit" description="Bus 1: RSP: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x081B" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: P-Cnt" description="Bus 1: RSP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x081C" option_set="XP_Region" title="CCN-504" name="Bus 1: RSP: TknV" description="Bus 1: RSP: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0821" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: H-bit" description="Bus 0: SNP: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x0822" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: S-bit" description="Bus 0: SNP: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x0823" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: P-Cnt" description="Bus 0: SNP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x0824" option_set="XP_Region" title="CCN-504" name="Bus 0: SNP: TknV" description="Bus 0: SNP: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0829" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: H-bit" description="Bus 1: SNP: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x082A" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: S-bit" description="Bus 1: SNP: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x082B" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: P-Cnt" description="Bus 1: SNP: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x082C" option_set="XP_Region" title="CCN-504" name="Bus 1: SNP: TknV" description="Bus 1: SNP: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0831" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: H-bit" description="Bus 0: DAT: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x0832" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: S-bit" description="Bus 0: DAT: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x0833" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: P-Cnt" description="Bus 0: DAT: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x0834" option_set="XP_Region" title="CCN-504" name="Bus 0: DAT: TknV" description="Bus 0: DAT: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0839" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: H-bit" description="Bus 1: DAT: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x083A" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: S-bit" description="Bus 1: DAT: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x083B" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: P-Cnt" description="Bus 1: DAT: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x083C" option_set="XP_Region" title="CCN-504" name="Bus 1: DAT: TknV" description="Bus 1: DAT: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0871" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: H-bit" description="Bus 0: DATB: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x0872" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: S-bit" description="Bus 0: DATB: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x0873" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: P-Cnt" description="Bus 0: DATB: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x0874" option_set="XP_Region" title="CCN-504" name="Bus 0: DATB: TknV" description="Bus 0: DATB: No TknV, signaled when this XP transmits a valid packet."/>
+ <event event="0x0879" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: H-bit" description="Bus 1: DATB: Set H-bit, signaled when this XP sets the H-bit."/>
+ <event event="0x087A" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: S-bit" description="Bus 1: DATB: Set S-bit, signaled when this XP sets the S-bit."/>
+ <event event="0x087B" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: P-Cnt" description="Bus 1: DATB: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
+ <event event="0x087C" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: TknV" description="Bus 1: DATB: No TknV, signaled when this XP transmits a valid packet."/>
+
+ <option_set name="HN-F_Region">
+ <option event_delta="0x200000" name="HN-F 3" description="Fully-coherent Home Node 3"/>
+ <option event_delta="0x210000" name="HN-F 5" description="Fully-coherent Home Node 5"/>
+ <option event_delta="0x220000" name="HN-F 7" description="Fully-coherent Home Node 7"/>
+ <option event_delta="0x230000" name="HN-F 8" description="Fully-coherent Home Node 8"/>
+ <option event_delta="0x240000" name="HN-F 13" description="Fully-coherent Home Node 13"/>
+ <option event_delta="0x250000" name="HN-F 15" description="Fully-coherent Home Node 15"/>
+ <option event_delta="0x260000" name="HN-F 17" description="Fully-coherent Home Node 17"/>
+ <option event_delta="0x270000" name="HN-F 18" description="Fully-coherent Home Node 18"/>
+ </option_set>
+
+ <event event="0x0401" option_set="HN-F_Region" title="CCN-504" name="Cache Miss" description="Counts the total cache misses. This is the first time lookup result, and is high priority."/>
+ <event event="0x0402" option_set="HN-F_Region" title="CCN-504" name="L3 SF Cache Access" description="Counts the number of cache accesses. This is the first time access, and is high priority."/>
+ <event event="0x0403" option_set="HN-F_Region" title="CCN-504" name="Cache Fill" description="Counts the total allocations in the HN L3 cache, and all cache line allocations to the L3 cache."/>
+ <event event="0x0404" option_set="HN-F_Region" title="CCN-504" name="POCQ Retry" description="Counts the number of requests that have been retried."/>
+ <event event="0x0405" option_set="HN-F_Region" title="CCN-504" name="POCQ Reqs Recvd" description="Counts the number of requests received by HN."/>
+ <event event="0x0406" option_set="HN-F_Region" title="CCN-504" name="SF Hit" description="Counts the number of snoop filter hits."/>
+ <event event="0x0407" option_set="HN-F_Region" title="CCN-504" name="SF Evictions" description="Counts the number of snoop filter evictions. Cache invalidations are initiated."/>
+ <event event="0x0408" option_set="HN-F_Region" title="CCN-504" name="Snoops Sent" description="Counts the number of snoops sent. Does not differentiate between broadcast or directed snoops."/>
+ <event event="0x0409" option_set="HN-F_Region" title="CCN-504" name="Snoops Broadcast" description="Counts the number of snoop broadcasts sent."/>
+ <event event="0x040A" option_set="HN-F_Region" title="CCN-504" name="L3 Eviction" description="Counts the number of L3 evictions."/>
+ <event event="0x040B" option_set="HN-F_Region" title="CCN-504" name="L3 Fill Invalid Way" description="Counts the number of L3 fills to an invalid way."/>
+ <event event="0x040C" option_set="HN-F_Region" title="CCN-504" name="MC Retries" description="Counts the number of transactions retried by the memory controller."/>
+ <event event="0x040D" option_set="HN-F_Region" title="CCN-504" name="MC Reqs" description="Counts the number of requests to the memory controller."/>
+ <event event="0x040E" option_set="HN-F_Region" title="CCN-504" name="QOS HH Retry" description="Counts the number of times a highest-priority QoS class was retried at the HN-F."/>
+
+ <option_set name="RN-I_Region">
+ <option event_delta="0x800000" name="RN-I 0" description="I/O-coherent Requesting Node 0"/>
+ <option event_delta="0x820000" name="RN-I 2" description="I/O-coherent Requesting Node 2"/>
+ <option event_delta="0x860000" name="RN-I 6" description="I/O-coherent Requesting Node 6"/>
+ <option event_delta="0x8C0000" name="RN-I 12" description="I/O-coherent Requesting Node 12"/>
+ <option event_delta="0x900000" name="RN-I 16" description="I/O-coherent Requesting Node 16"/>
+ <option event_delta="0x940000" name="RN-I 20" description="I/O-coherent Requesting Node 20"/>
+ </option_set>
+
+ <event event="0x1601" option_set="RN-I_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
+ <event event="0x1602" option_set="RN-I_Region" title="CCN-504" name="S1 RDataBeats" description="S1 RDataBeats."/>
+ <event event="0x1603" option_set="RN-I_Region" title="CCN-504" name="S2 RDataBeats" description="S2 RDataBeats."/>
+ <event event="0x1604" option_set="RN-I_Region" title="CCN-504" name="RXDAT Flits received" description="RXDAT Flits received."/>
+ <event event="0x1605" option_set="RN-I_Region" title="CCN-504" name="TXDAT Flits sent" description="TXDAT Flits sent."/>
+ <event event="0x1606" option_set="RN-I_Region" title="CCN-504" name="Total TXREQ Flits sent" description="Total TXREQ Flits sent."/>
+ <event event="0x1607" option_set="RN-I_Region" title="CCN-504" name="Retried TXREQ Flits sent" description="Retried TXREQ Flits sent."/>
+ <event event="0x1608" option_set="RN-I_Region" title="CCN-504" name="RRT full" description="RRT full."/>
+ <event event="0x1609" option_set="RN-I_Region" title="CCN-504" name="WRT full" description="WRT full."/>
+ <event event="0x160A" option_set="RN-I_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
+
+ <option_set name="SBAS_Region">
+ <option event_delta="0x810000" name="SBAS 1" description="ACE master to CHI protocol bridge 1"/>
+ <option event_delta="0x890000" name="SBAS 9" description="ACE master to CHI protocol bridge 9"/>
+ <option event_delta="0x8B0000" name="SBAS 11" description="ACE master to CHI protocol bridge 11"/>
+ <option event_delta="0x930000" name="SBAS 19" description="ACE master to CHI protocol bridge 19"/>
+ </option_set>
+
+ <event event="0x1001" option_set="SBAS_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
+ <event event="0x1004" option_set="SBAS_Region" title="CCN-504" name="RXDAT Flits received" description="RXDAT Flits received."/>
+ <event event="0x1005" option_set="SBAS_Region" title="CCN-504" name="TXDAT Flits sent" description="TXDAT Flits sent."/>
+ <event event="0x1006" option_set="SBAS_Region" title="CCN-504" name="Total TXREQ Flits sent" description="Total TXREQ Flits sent."/>
+ <event event="0x1007" option_set="SBAS_Region" title="CCN-504" name="Retried TXREQ Flits sent" description="Retried TXREQ Flits sent."/>
+ <event event="0x1008" option_set="SBAS_Region" title="CCN-504" name="RRT full" description="RRT full."/>
+ <event event="0x1009" option_set="SBAS_Region" title="CCN-504" name="WRT full" description="WRT full."/>
+ <event event="0x100A" option_set="SBAS_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
+
+ </category>
diff --git a/daemon/events-Cortex-A12.xml b/daemon/events-Cortex-A12.xml
new file mode 100644
index 0000000..20a4772
--- /dev/null
+++ b/daemon/events-Cortex-A12.xml
@@ -0,0 +1,86 @@
+ <counter_set name="ARM_Cortex-A12_cnt" count="6"/>
+ <category name="Cortex-A12" counter_set="ARM_Cortex-A12_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+ <event counter="ARM_Cortex-A12_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+ <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
+ <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
+ <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
+ <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
+ <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
+ <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
+ <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
+ <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
+ <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
+ <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
+ <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
+ <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
+ <event event="0x14" title="Cache" name="L1 inst access" description="Instruction cache access"/>
+ <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
+ <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
+ <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
+ <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
+ <event event="0x19" title="Bus" name="Access" description="Bus - Access"/>
+ <event event="0x1b" title="Instruction" name="Speculative" description="Instruction speculatively executed"/>
+ <event event="0x1c" title="Memory" name="Translation table" description="Write to translation table base architecturally executed"/>
+ <event event="0x1d" title="Bus" name="Cycle" description="Bus - Cycle"/>
+ <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
+ <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
+ <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
+ <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
+ <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-Back - Victim"/>
+ <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-Back - Cleaning and coherency"/>
+ <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
+ <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
+ <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
+ <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
+ <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
+ <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
+ <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
+ <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
+ <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
+ <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
+ <event event="0x6a" title="Memory" name="Unaligned" description="Unaligned access"/>
+ <event event="0x6c" title="Intrinsic" name="LDREX" description="Exclusive instruction speculatively executed - LDREX"/>
+ <event event="0x6e" title="Intrinsic" name="STREX fail" description="Exclusive instruction speculatively executed - STREX fail"/>
+ <event event="0x6f" title="Intrinsic" name="STREX" description="Exclusive instruction speculatively executed - STREX"/>
+ <event event="0x70" title="Instruction" name="Load" description="Instruction speculatively executed - Load"/>
+ <event event="0x71" title="Instruction" name="Store" description="Instruction speculatively executed - Store"/>
+ <event event="0x72" title="Instruction" name="Load/Store" description="Instruction speculatively executed - Load or store"/>
+ <event event="0x73" title="Instruction" name="Integer" description="Instruction speculatively executed - Integer data processing"/>
+ <event event="0x74" title="Instruction" name="Advanced SIMD" description="Instruction speculatively executed - Advanced SIMD"/>
+ <event event="0x75" title="Instruction" name="VFP" description="Instruction speculatively executed - VFP"/>
+ <event event="0x76" title="Instruction" name="Software change" description="Instruction speculatively executed - Software change of the PC"/>
+ <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
+ <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
+ <event event="0x7a" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
+ <event event="0x7c" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
+ <event event="0x7d" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
+ <event event="0x7e" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
+ <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
+ <event event="0x8a" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
+ <event event="0xc0" title="Instruction" name="Stalled Linefill" description="Instruction side stalled due to a Linefill"/>
+ <event event="0xc1" title="Instruction" name="Stalled Page Table Walk" description="Instruction Side stalled due to a Page Table Walk"/>
+ <event event="0xc2" title="Cache" name="4 Ways Read" description="Number of set of 4 ways read in the instruction cache - Tag RAM"/>
+ <event event="0xc3" title="Cache" name="Ways Read" description="Number of ways read in the instruction cache - Data RAM"/>
+ <event event="0xc4" title="Cache" name="BATC Read" description="Number of ways read in the instruction BTAC RAM"/>
+ <event event="0xca" title="Memory" name="Snoop" description="Data snooped from other processor. This event counts memory-read operations that read data from another processor within the local Cortex-A12 cluster, rather than accessing the L2 cache or issuing an external read. It increments on each transaction, rather than on each beat of data"/>
+ <event event="0xd3" title="Slots" name="Load-Store Unit" description="Duration during which all slots in the Load-Store Unit are busy"/>
+ <event event="0xd8" title="Slots" name="Load-Store Issue Queue" description="Duration during which all slots in the Load-Store Issue queue are busy"/>
+ <event event="0xd9" title="Slots" name="Data Processing Issue Queue" description="Duration during which all slots in the Data Processing issue queue are busy"/>
+ <event event="0xda" title="Slots" name="Data Engine Issue Queue" description="Duration during which all slots in the Data Engine issue queue are busy"/>
+ <event event="0xdb" title="NEON" name="Flush" description="Number of NEON instruction which fail their condition code and lead to a flush of the DE pipe"/>
+ <event event="0xdc" title="Hypervisor" name="Traps" description="Number of Trap to hypervisor"/>
+ <event event="0xde" title="PTM" name="EXTOUT 0" description="PTM EXTOUT 0"/>
+ <event event="0xdf" title="PTM" name="EXTOUT 1" description="PTM EXTOUT 1"/>
+ <event event="0xe0" title="MMU" name="Table Walk" description="Duration during which the MMU handle a Page table walk"/>
+ <event event="0xe1" title="MMU" name="Stage1 Table Walk" description="Duration during which the MMU handle a Stage1 Page table walk"/>
+ <event event="0xe2" title="MMU" name="Stage2 Table Walk" description="Duration during which the MMU handle a Stage2 Page table walk"/>
+ <event event="0xe3" title="MMU" name="LSU Table Walk" description="Duration during which the MMU handle a Page table walk requested by the Load Store Unit"/>
+ <event event="0xe4" title="MMU" name="Instruction Table Walk" description="Duration during which the MMU handle a Page table walk requested by the Instruction side"/>
+ <event event="0xe5" title="MMU" name="Preload Table Walk" description="Duration during which the MMU handle a Page table walk requested by a Preload instruction or Prefetch request"/>
+ <event event="0xe6" title="MMU" name="cp15 Table Walk" description="Duration during which the MMU handle a Page table walk requested by a cp15 operation (maintenance by MVA and VA-to-PA operation)"/>
+ <event event="0xe7" title="Cache" name="L1 PLD TLB refill" description="Level 1 PLD TLB refill"/>
+ <event event="0xe8" title="Cache" name="L1 CP15 TLB refill" description="Level 1 CP15 TLB refill"/>
+ <event event="0xe9" title="Cache" name="L1 TLB flush" description="Level 1 TLB flush"/>
+ <event event="0xea" title="Cache" name="L2 TLB access" description="Level 2 TLB access"/>
+ <event event="0xeb" title="Cache" name="L2 TLB miss" description="Level 2 TLB miss"/>
+ </category>
diff --git a/daemon/events-Linux.xml b/daemon/events-Linux.xml
index 05dc613..4a30ad6 100644
--- a/daemon/events-Linux.xml
+++ b/daemon/events-Linux.xml
@@ -11,7 +11,7 @@
<event counter="Linux_meminfo_bufferram" title="Memory" name="Buffer" display="maximum" units="B" average_selection="yes" description="Memory used by OS disk buffers"/>
<event counter="Linux_power_cpu_freq" title="Clock" name="Frequency" per_cpu="yes" display="maximum" units="Hz" series_composition="overlay" average_selection="yes" average_cores="yes" description="Frequency setting of the CPU"/>
<event counter="Linux_power_cpu_idle" title="Idle" name="State" per_cpu="yes" display="maximum" average_selection="yes" description="CPU Idle State + 1, set the Sample Rate to None to prevent the hrtimer from interrupting the system"/>
- <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" display="maximum" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on contended resource"/>
- <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" display="maximum" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on I/O resource"/>
+ <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" display="average" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on contended resource"/>
+ <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" display="average" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on I/O resource"/>
</category>
diff --git a/daemon/events-Mali-400.xml b/daemon/events-Mali-4xx.xml
index dceccfb..8772ce4 100644
--- a/daemon/events-Mali-400.xml
+++ b/daemon/events-Mali-4xx.xml
@@ -1,8 +1,7 @@
- <counter_set name="ARM_Mali-400_VP_cnt" count="2"/>
- <counter_set name="ARM_Mali-400_L2_cnt" count="2"/>
- <counter_set name="ARM_Mali-400_SW_cnt" count="0"/>
- <counter_set name="ARM_Mali-400_Filmstrip_cnt" count="1"/>
- <category name="Mali-400-VP" counter_set="ARM_Mali-400_VP_cnt" per_cpu="no">
+ <counter_set name="ARM_Mali-4xx_VP_0_cnt" count="2"/>
+ <counter_set name="ARM_Mali-4xx_SW_cnt" count="0"/>
+ <counter_set name="ARM_Mali-4xx_Filmstrip_cnt" count="1"/>
+ <category name="Mali-4xx-VP" counter_set="ARM_Mali-4xx_VP_0_cnt" per_cpu="no">
<event event="0x01" title="Mali GPU Vertex Processor" name="Active cycles" description="Number of cycles per frame the MaliGP2 was active."/>
<event event="0x02" title="Mali GPU Vertex Processor" name="Active cycles, vertex shader" description="Number of cycles per frame the vertex shader unit was active."/>
<event event="0x03" title="Mali GPU Vertex Processor" name="Active cycles, vertex storer" description="Number of cycles per frame the vertex storer unit was active."/>
@@ -30,70 +29,75 @@
<event event="0x21" title="Mali GPU Vertex Processor" name="Active cycles, PLBU tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over the tiles in the bounding box generating commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
</category>
<category name="Mali GPU Fragment Processor" per_cpu="no">
- <counter_set name="ARM_Mali-400_FP0_cnt" title="Mali-400 FP0" description="Mali GPU Fragment Processor 0" count="2"/>
- <counter_set name="ARM_Mali-400_FP1_cnt" title="Mali-400 FP1" description="Mali GPU Fragment Processor 1" count="2"/>
- <counter_set name="ARM_Mali-400_FP2_cnt" title="Mali-400 FP2" description="Mali GPU Fragment Processor 2" count="2"/>
- <counter_set name="ARM_Mali-400_FP3_cnt" title="Mali-400 FP3" description="Mali GPU Fragment Processor 3" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_0_cnt" title="Mali-4xx FP0" description="Mali GPU Fragment Processor 0" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_1_cnt" title="Mali-4xx FP1" description="Mali GPU Fragment Processor 1" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_2_cnt" title="Mali-4xx FP2" description="Mali GPU Fragment Processor 2" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_3_cnt" title="Mali-4xx FP3" description="Mali GPU Fragment Processor 3" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_4_cnt" title="Mali-4xx FP4" description="Mali GPU Fragment Processor 4" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_5_cnt" title="Mali-4xx FP5" description="Mali GPU Fragment Processor 5" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_6_cnt" title="Mali-4xx FP6" description="Mali GPU Fragment Processor 6" count="2"/>
+ <counter_set name="ARM_Mali-4xx_FP_7_cnt" title="Mali-4xx FP7" description="Mali GPU Fragment Processor 7" count="2"/>
- <event event="0x00" title="Mali-400 FP" name="Active clock cycles" description="Active clock cycles, between polygon start and IRQ."/>
- <event event="0x02" title="Mali-400 FP" name="Total bus reads" description="Total number of 64-bit words read from the bus."/>
- <event event="0x03" title="Mali-400 FP" name="Total bus writes" description="Total number of 64-bit words written to the bus."/>
- <event event="0x04" title="Mali-400 FP" name="Bus read request cycles" description="Number of cycles during which the bus read request signal was HIGH."/>
- <event event="0x05" title="Mali-400 FP" name="Bus write request cycles" description="Number of cycles during which the bus write request signal was HIGH."/>
- <event event="0x06" title="Mali-400 FP" name="Bus read transactions count" description="Number of read requests accepted by the bus."/>
- <event event="0x07" title="Mali-400 FP" name="Bus write transactions" description="Number of write requests accepted by the bus."/>
- <event event="0x09" title="Mali-400 FP" name="Tile writeback writes" description="64-bit words written to the bus by the writeback unit."/>
- <event event="0x0a" title="Mali-400 FP" name="Store unit writes" description="64-bit words written to the bus by the store unit."/>
- <event event="0x0d" title="Mali-400 FP" name="Texture cache uncompressed reads" description="Number of 64-bit words read from the bus into the uncompressed textures cache."/>
- <event event="0x0e" title="Mali-400 FP" name="Polygon list reads" description="Number of 64-bit words read from the bus by the polygon list reader."/>
- <event event="0x0f" title="Mali-400 FP" name="RSW reads" description="Number of 64-bit words read from the bus into the Render State Word register."/>
- <event event="0x10" title="Mali-400 FP" name="Vertex cache reads" description="Number of 64-bit words read from the bus into the vertex cache."/>
- <event event="0x11" title="Mali-400 FP" name="Uniform remapping reads" description="Number of 64-bit words read from the bus when reading from the uniform remapping table."/>
- <event event="0x12" title="Mali-400 FP" name="Program cache reads" description="Number of 64-bit words read from the bus into the fragment shader program cache."/>
- <event event="0x13" title="Mali-400 FP" name="Varying reads" description="Number of 64-bit words containing varyings generated by the vertex processing read from the bus."/>
- <event event="0x14" title="Mali-400 FP" name="Texture descriptors reads" description="Number of 64-bit words containing texture descriptors read from the bus."/>
- <event event="0x15" title="Mali-400 FP" name="Texture descriptor remapping reads" description="Number of 64-bit words read from the bus when reading from the texture descriptor remapping table."/>
- <event event="0x17" title="Mali-400 FP" name="Load unit reads" description="Number of 64-bit words read from the bus by the LOAD sub-instruction."/>
- <event event="0x18" title="Mali-400 FP" name="Polygon count" description="Number of triangles read from the polygon list."/>
- <event event="0x19" title="Mali-400 FP" name="Pixel rectangle count" description="Number of pixel rectangles read from the polygon list."/>
- <event event="0x1a" title="Mali-400 FP" name="Lines count" description="Number of lines read from the polygon list."/>
- <event event="0x1b" title="Mali-400 FP" name="Points count" description="Number of points read from the polygon list."/>
- <event event="0x1c" title="Mali-400 FP" name="Stall cycles PolygonListReader" description="Number of clock cycles the Polygon List Reader waited for output being collected."/>
- <event event="0x1d" title="Mali-400 FP" name="Stall cycles triangle setup" description="Number of clock cycles the TSC waited for input."/>
- <event event="0x1e" title="Mali-400 FP" name="Quad rasterized count" description="Number of 2x?2 quads output from rasterizer."/>
- <event event="0x1f" title="Mali-400 FP" name="Fragment rasterized count" description="Number of fragment rasterized. Fragments/(Quads*4) gives average actual fragments per quad."/>
- <event event="0x20" title="Mali-400 FP" name="Fragment rejected fragment-kill count" description="Number of fragments exiting the fragment shader as killed."/>
- <event event="0x21" title="Mali-400 FP" name="Fragment rejected fwd-fragment-kill count" description="Number of fragments killed by forward fragment kill."/>
- <event event="0x22" title="Mali-400 FP" name="Fragment passed z/stencil count" description="Number of fragments passing Z and stencil test."/>
- <event event="0x23" title="Mali-400 FP" name="Patches rejected early z/stencil count" description="Number of patches rejected by EarlyZ. A patch can be 8x8, 4x4 or 2x2 pixels."/>
- <event event="0x24" title="Mali-400 FP" name="Patches evaluated" description="Number of patches evaluated for EarlyZ rejection."/>
- <event event="0x25" title="Mali-400 FP" name="Instruction completed count" description="Number of fragment shader instruction words completed. It is a function of pixels processed and the length of the shader programs."/>
- <event event="0x26" title="Mali-400 FP" name="Instruction failed rendezvous count" description="Number of fragment shader instructions not completed because of failed Rendezvous."/>
- <event event="0x27" title="Mali-400 FP" name="Instruction failed varying-miss count" description="Number of fragment shader instructions not completed because of failed varying operation."/>
- <event event="0x28" title="Mali-400 FP" name="Instruction failed texture-miss count" description="Number of fragment shader instructions not completed because of failed texture operation."/>
- <event event="0x29" title="Mali-400 FP" name="Instruction failed load-miss count" description="Number of fragment shader instructions not completed because of failed load operation."/>
- <event event="0x2a" title="Mali-400 FP" name="Instruction failed tile read-miss count" description="Number of fragment shader instructions not completed because of failed read from the tilebuffer."/>
- <event event="0x2b" title="Mali-400 FP" name="Instruction failed store-miss count" description="Number of fragment shader instructions not completed because of failed store operation."/>
- <event event="0x2c" title="Mali-400 FP" name="Rendezvous breakage count" description="Number of Rendezvous breakages reported."/>
- <event event="0x2d" title="Mali-400 FP" name="Pipeline bubbles cycle count" description="Number of unused cycles in the fragment shader while rendering is active."/>
- <event event="0x2e" title="Mali-400 FP" name="Texture mapper multipass count" description="Number of texture operations looped because of more texture passes needed."/>
- <event event="0x2f" title="Mali-400 FP" name="Texture mapper cycle count" description="Number of texture operation cycles."/>
- <event event="0x30" title="Mali-400 FP" name="Vertex cache hit count" description="Number of times a requested vertex was found in the cache (Number of vertex cache hits)."/>
- <event event="0x31" title="Mali-400 FP" name="Vertex cache miss count" description="Number of times a requested vertex was not found in the cache (Number of vertex cache misses)."/>
- <event event="0x32" title="Mali-400 FP" name="Varying cache hit count" description="Number of times a requested varying was found in the cache (Number of varying cache hits)."/>
- <event event="0x33" title="Mali-400 FP" name="Varying cache miss count" description="Number of times a requested varying was not found in the cache (Number of varying cache misses)."/>
- <event event="0x34" title="Mali-400 FP" name="Varying cache conflict miss count" description="Number of times a requested varying was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
- <event event="0x35" title="Mali-400 FP" name="Texture cache hit count" description="Number of times a requested texel was found in the texture cache (Number of texture cache hits)."/>
- <event event="0x36" title="Mali-400 FP" name="Texture cache miss count" description="Number of times a requested texel was not found in the texture cache (Number of texture cache misses)."/>
- <event event="0x37" title="Mali-400 FP" name="Texture cache conflict miss count" description="Number of times a requested texel was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
- <event event="0x38" title="Mali-400 FP" name="Compressed texture cache hit count" description="Number of times a requested item was found in the cache."/>
- <event event="0x39" title="Mali-400 FP" name="Compressed texture cache miss count" description="Number of times a requested item was not found in the cache."/>
- <event event="0x3a" title="Mali-400 FP" name="Load/Store cache hit count" description="Number of hits in the load/store cache."/>
- <event event="0x3b" title="Mali-400 FP" name="Load/Store cache miss count" description="Number of misses in the load/store cache."/>
- <event event="0x3c" title="Mali-400 FP" name="Program cache hit count" description="Number of hits in the program cache."/>
- <event event="0x3d" title="Mali-400 FP" name="Program cache miss count" description="Number of misses in the program cache."/>
+ <event event="0x00" title="Mali-4xx FP" name="Active clock cycles" description="Active clock cycles, between polygon start and IRQ."/>
+ <event event="0x02" title="Mali-4xx FP" name="Total bus reads" description="Total number of 64-bit words read from the bus."/>
+ <event event="0x03" title="Mali-4xx FP" name="Total bus writes" description="Total number of 64-bit words written to the bus."/>
+ <event event="0x04" title="Mali-4xx FP" name="Bus read request cycles" description="Number of cycles during which the bus read request signal was HIGH."/>
+ <event event="0x05" title="Mali-4xx FP" name="Bus write request cycles" description="Number of cycles during which the bus write request signal was HIGH."/>
+ <event event="0x06" title="Mali-4xx FP" name="Bus read transactions count" description="Number of read requests accepted by the bus."/>
+ <event event="0x07" title="Mali-4xx FP" name="Bus write transactions" description="Number of write requests accepted by the bus."/>
+ <event event="0x09" title="Mali-4xx FP" name="Tile writeback writes" description="64-bit words written to the bus by the writeback unit."/>
+ <event event="0x0a" title="Mali-4xx FP" name="Store unit writes" description="64-bit words written to the bus by the store unit."/>
+ <event event="0x0d" title="Mali-4xx FP" name="Texture cache uncompressed reads" description="Number of 64-bit words read from the bus into the uncompressed textures cache."/>
+ <event event="0x0e" title="Mali-4xx FP" name="Polygon list reads" description="Number of 64-bit words read from the bus by the polygon list reader."/>
+ <event event="0x0f" title="Mali-4xx FP" name="RSW reads" description="Number of 64-bit words read from the bus into the Render State Word register."/>
+ <event event="0x10" title="Mali-4xx FP" name="Vertex cache reads" description="Number of 64-bit words read from the bus into the vertex cache."/>
+ <event event="0x11" title="Mali-4xx FP" name="Uniform remapping reads" description="Number of 64-bit words read from the bus when reading from the uniform remapping table."/>
+ <event event="0x12" title="Mali-4xx FP" name="Program cache reads" description="Number of 64-bit words read from the bus into the fragment shader program cache."/>
+ <event event="0x13" title="Mali-4xx FP" name="Varying reads" description="Number of 64-bit words containing varyings generated by the vertex processing read from the bus."/>
+ <event event="0x14" title="Mali-4xx FP" name="Texture descriptors reads" description="Number of 64-bit words containing texture descriptors read from the bus."/>
+ <event event="0x15" title="Mali-4xx FP" name="Texture descriptor remapping reads" description="Number of 64-bit words read from the bus when reading from the texture descriptor remapping table."/>
+ <event event="0x17" title="Mali-4xx FP" name="Load unit reads" description="Number of 64-bit words read from the bus by the LOAD sub-instruction."/>
+ <event event="0x18" title="Mali-4xx FP" name="Polygon count" description="Number of triangles read from the polygon list."/>
+ <event event="0x19" title="Mali-4xx FP" name="Pixel rectangle count" description="Number of pixel rectangles read from the polygon list."/>
+ <event event="0x1a" title="Mali-4xx FP" name="Lines count" description="Number of lines read from the polygon list."/>
+ <event event="0x1b" title="Mali-4xx FP" name="Points count" description="Number of points read from the polygon list."/>
+ <event event="0x1c" title="Mali-4xx FP" name="Stall cycles PolygonListReader" description="Number of clock cycles the Polygon List Reader waited for output being collected."/>
+ <event event="0x1d" title="Mali-4xx FP" name="Stall cycles triangle setup" description="Number of clock cycles the TSC waited for input."/>
+ <event event="0x1e" title="Mali-4xx FP" name="Quad rasterized count" description="Number of 2x?2 quads output from rasterizer."/>
+ <event event="0x1f" title="Mali-4xx FP" name="Fragment rasterized count" description="Number of fragment rasterized. Fragments/(Quads*4) gives average actual fragments per quad."/>
+ <event event="0x20" title="Mali-4xx FP" name="Fragment rejected fragment-kill count" description="Number of fragments exiting the fragment shader as killed."/>
+ <event event="0x21" title="Mali-4xx FP" name="Fragment rejected fwd-fragment-kill count" description="Number of fragments killed by forward fragment kill."/>
+ <event event="0x22" title="Mali-4xx FP" name="Fragment passed z/stencil count" description="Number of fragments passing Z and stencil test."/>
+ <event event="0x23" title="Mali-4xx FP" name="Patches rejected early z/stencil count" description="Number of patches rejected by EarlyZ. A patch can be 8x8, 4x4 or 2x2 pixels."/>
+ <event event="0x24" title="Mali-4xx FP" name="Patches evaluated" description="Number of patches evaluated for EarlyZ rejection."/>
+ <event event="0x25" title="Mali-4xx FP" name="Instruction completed count" description="Number of fragment shader instruction words completed. It is a function of pixels processed and the length of the shader programs."/>
+ <event event="0x26" title="Mali-4xx FP" name="Instruction failed rendezvous count" description="Number of fragment shader instructions not completed because of failed Rendezvous."/>
+ <event event="0x27" title="Mali-4xx FP" name="Instruction failed varying-miss count" description="Number of fragment shader instructions not completed because of failed varying operation."/>
+ <event event="0x28" title="Mali-4xx FP" name="Instruction failed texture-miss count" description="Number of fragment shader instructions not completed because of failed texture operation."/>
+ <event event="0x29" title="Mali-4xx FP" name="Instruction failed load-miss count" description="Number of fragment shader instructions not completed because of failed load operation."/>
+ <event event="0x2a" title="Mali-4xx FP" name="Instruction failed tile read-miss count" description="Number of fragment shader instructions not completed because of failed read from the tilebuffer."/>
+ <event event="0x2b" title="Mali-4xx FP" name="Instruction failed store-miss count" description="Number of fragment shader instructions not completed because of failed store operation."/>
+ <event event="0x2c" title="Mali-4xx FP" name="Rendezvous breakage count" description="Number of Rendezvous breakages reported."/>
+ <event event="0x2d" title="Mali-4xx FP" name="Pipeline bubbles cycle count" description="Number of unused cycles in the fragment shader while rendering is active."/>
+ <event event="0x2e" title="Mali-4xx FP" name="Texture mapper multipass count" description="Number of texture operations looped because of more texture passes needed."/>
+ <event event="0x2f" title="Mali-4xx FP" name="Texture mapper cycle count" description="Number of texture operation cycles."/>
+ <event event="0x30" title="Mali-4xx FP" name="Vertex cache hit count" description="Number of times a requested vertex was found in the cache (Number of vertex cache hits)."/>
+ <event event="0x31" title="Mali-4xx FP" name="Vertex cache miss count" description="Number of times a requested vertex was not found in the cache (Number of vertex cache misses)."/>
+ <event event="0x32" title="Mali-4xx FP" name="Varying cache hit count" description="Number of times a requested varying was found in the cache (Number of varying cache hits)."/>
+ <event event="0x33" title="Mali-4xx FP" name="Varying cache miss count" description="Number of times a requested varying was not found in the cache (Number of varying cache misses)."/>
+ <event event="0x34" title="Mali-4xx FP" name="Varying cache conflict miss count" description="Number of times a requested varying was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
+ <event event="0x35" title="Mali-4xx FP" name="Texture cache hit count" description="Number of times a requested texel was found in the texture cache (Number of texture cache hits)."/>
+ <event event="0x36" title="Mali-4xx FP" name="Texture cache miss count" description="Number of times a requested texel was not found in the texture cache (Number of texture cache misses)."/>
+ <event event="0x37" title="Mali-4xx FP" name="Texture cache conflict miss count" description="Number of times a requested texel was not in the cache and its value, retrieved from memory, must overwrite an older cache entry. This happens when an access pattern cannot be serviced by the cache."/>
+ <event event="0x38" title="Mali-4xx FP" name="Compressed texture cache hit count" description="Number of times a requested item was found in the cache."/>
+ <event event="0x39" title="Mali-4xx FP" name="Compressed texture cache miss count" description="Number of times a requested item was not found in the cache."/>
+ <event event="0x3a" title="Mali-4xx FP" name="Load/Store cache hit count" description="Number of hits in the load/store cache."/>
+ <event event="0x3b" title="Mali-4xx FP" name="Load/Store cache miss count" description="Number of misses in the load/store cache."/>
+ <event event="0x3c" title="Mali-4xx FP" name="Program cache hit count" description="Number of hits in the program cache."/>
+ <event event="0x3d" title="Mali-4xx FP" name="Program cache miss count" description="Number of misses in the program cache."/>
</category>
- <category name="Mali-400-L2" counter_set="ARM_Mali-400_L2_cnt" per_cpu="no">
+ <counter_set name="ARM_Mali-4xx_L2_0_cnt" title="Mali-4xx L2 0" description="Mali GPU L2 Cache Core 0" count="2"/>
+ <category name="Mali-4xx-L2_0" counter_set="ARM_Mali-4xx_L2_0_cnt" per_cpu="no">
<event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
<event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
@@ -126,55 +130,122 @@
<event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
<event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
</category>
- <category name="ARM Mali-400 Filmstrip" counter_set="ARM_Mali-400_Filmstrip_cnt" per_cpu="no">
+ <counter_set name="ARM_Mali-4xx_L2_1_cnt" title="Mali-4xx L2 1" description="Mali GPU L2 Cache Core 1" count="2"/>
+ <category name="Mali-4xx-L2_1" counter_set="ARM_Mali-4xx_L2_1_cnt" per_cpu="no">
+ <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
+ <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
+
+ <option_set name="All">
+ <option event_delta="0x08" name="Master" description="Master"/>
+ <option event_delta="0x10" name="All slaves" description="All slaves"/>
+ <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+ <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+ <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+ <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+ <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+ </option_set>
+
+ <option_set name="Slaves">
+ <option event_delta="0x10" name="All slaves" description="All slaves"/>
+ <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+ <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+ <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+ <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+ <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+ </option_set>
+
+ <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
+ <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
+ <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
+ <event event="0x03" option_set="All" title="Mali L2 Cache" name="Words written" description="Words written"/>
+ <event event="0x04" option_set="Slaves" title="Mali L2 Cache" name="Read hits" description="Read hits"/>
+ <event event="0x05" option_set="Slaves" title="Mali L2 Cache" name="Read misses" description="Read misses"/>
+ <event event="0x06" option_set="Slaves" title="Mali L2 Cache" name="Write invalidates" description="Write invalidates"/>
+ <event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
+ <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
+ </category>
+ <counter_set name="ARM_Mali-4xx_L2_2_cnt" title="Mali-4xx L2 2" description="Mali GPU L2 Cache Core 2" count="2"/>
+ <category name="Mali-4xx-L2_2" counter_set="ARM_Mali-4xx_L2_2_cnt" per_cpu="no">
+ <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
+ <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
+
+ <option_set name="All">
+ <option event_delta="0x08" name="Master" description="Master"/>
+ <option event_delta="0x10" name="All slaves" description="All slaves"/>
+ <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+ <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+ <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+ <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+ <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+ </option_set>
+
+ <option_set name="Slaves">
+ <option event_delta="0x10" name="All slaves" description="All slaves"/>
+ <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
+ <option event_delta="0x30" name="Slave 1" description="Slave 1"/>
+ <option event_delta="0x40" name="Slave 2" description="Slave 2"/>
+ <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
+ <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
+ </option_set>
+
+ <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
+ <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
+ <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
+ <event event="0x03" option_set="All" title="Mali L2 Cache" name="Words written" description="Words written"/>
+ <event event="0x04" option_set="Slaves" title="Mali L2 Cache" name="Read hits" description="Read hits"/>
+ <event event="0x05" option_set="Slaves" title="Mali L2 Cache" name="Read misses" description="Read misses"/>
+ <event event="0x06" option_set="Slaves" title="Mali L2 Cache" name="Write invalidates" description="Write invalidates"/>
+ <event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
+ <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
+ </category>
+ <category name="ARM Mali-4xx Filmstrip" counter_set="ARM_Mali-4xx_Filmstrip_cnt" per_cpu="no">
<option_set name="fs">
<option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
<option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
<option event_delta="0xa" name="1:10" description="captures every 10th frame"/>
</option_set>
- <event event="0x0400" option_set="fs" title="ARM Mali-400" name="Filmstrip" description="Scaled framebuffer"/>
+ <event event="0x0400" option_set="fs" title="ARM Mali-4xx" name="Filmstrip" description="Scaled framebuffer"/>
</category>
- <category name="ARM_Mali-400_Voltage" per_cpu="no">
- <event counter="ARM_Mali-400_Voltage" title="Mali GPU Voltage" name="Voltage" display="average" average_selection="yes" units="mV" description="GPU core voltage."/>
+ <category name="ARM_Mali-4xx_Voltage" per_cpu="no">
+ <event counter="ARM_Mali-4xx_Voltage" title="Mali GPU Voltage" name="Voltage" display="average" average_selection="yes" units="mV" description="GPU core voltage."/>
</category>
- <category name="ARM_Mali-400_Frequency" per_cpu="no">
- <event counter="ARM_Mali-400_Frequency" title="Mali GPU Frequency" name="Frequency" display="average" average_selection="yes" units="MHz" description="GPU core frequency."/>
+ <category name="ARM_Mali-4xx_Frequency" per_cpu="no">
+ <event counter="ARM_Mali-4xx_Frequency" title="Mali GPU Frequency" name="Frequency" display="average" average_selection="yes" units="MHz" description="GPU core frequency."/>
</category>
- <category name="Mali-400-SW" counter_set="ARM_Mali-400_SW_cnt" per_cpu="no">
+ <category name="Mali-4xx-SW" counter_set="ARM_Mali-4xx_SW_cnt" per_cpu="no">
<!-- EGL Counters -->
- <event counter="ARM_Mali-400_SW_17" title="Mali EGL Software Counters" name="Blit Time" description="Time spent blitting the framebuffer from video memory to framebuffer."/>
+ <event counter="ARM_Mali-4xx_SW_0" title="Mali EGL Software Counters" name="Blit Time" description="Time spent blitting the framebuffer from video memory to framebuffer."/>
<!-- glDrawElements Counters -->
- <event counter="ARM_Mali-400_SW_18" title="glDrawElements Statistics" name="Calls to glDrawElements" description="Number of calls to glDrawElements."/>
- <event counter="ARM_Mali-400_SW_19" title="glDrawElements Statistics" name="Indices to glDrawElements" description="Number of indices to glDrawElements."/>
- <event counter="ARM_Mali-400_SW_20" title="glDrawElements Statistics" name="Transformed by glDrawElements" description="Number of vertices transformed by glDrawElements."/>
+ <event counter="ARM_Mali-4xx_SW_1" title="glDrawElements Statistics" name="Calls to glDrawElements" description="Number of calls to glDrawElements."/>
+ <event counter="ARM_Mali-4xx_SW_2" title="glDrawElements Statistics" name="Indices to glDrawElements" description="Number of indices to glDrawElements."/>
+ <event counter="ARM_Mali-4xx_SW_3" title="glDrawElements Statistics" name="Transformed by glDrawElements" description="Number of vertices transformed by glDrawElements."/>
<!-- glDrawArrays Counters -->
- <event counter="ARM_Mali-400_SW_21" title="glDrawArrays Statistics" name="Calls to glDrawArrays" description="Number of calls to glDrawArrays."/>
- <event counter="ARM_Mali-400_SW_22" title="glDrawArrays Statistics" name="Transformed by glDrawArrays" description="Number of vertices transformed by glDrawArrays."/>
+ <event counter="ARM_Mali-4xx_SW_4" title="glDrawArrays Statistics" name="Calls to glDrawArrays" description="Number of calls to glDrawArrays."/>
+ <event counter="ARM_Mali-4xx_SW_5" title="glDrawArrays Statistics" name="Transformed by glDrawArrays" description="Number of vertices transformed by glDrawArrays."/>
<!-- Draw Call Counters -->
- <event counter="ARM_Mali-400_SW_23" title="Drawcall Statistics" name="Points" description="Number of calls to glDraw* with parameter GL_POINTS."/>
- <event counter="ARM_Mali-400_SW_24" title="Drawcall Statistics" name="Lines" description="Number of calls to glDraw* with parameter GL_LINES."/>
- <event counter="ARM_Mali-400_SW_25" title="Drawcall Statistics" name="Lineloop" description="Number of calls to glDraw* with parameter GL_LINE_LOOP."/>
- <event counter="ARM_Mali-400_SW_26" title="Drawcall Statistics" name="Linestrip" description="Number of calls to glDraw* with parameter GL_LINE_STRIP."/>
- <event counter="ARM_Mali-400_SW_27" title="Drawcall Statistics" name="Triangles" description="Number of calls to glDraw* with parameter GL_TRIANGLES."/>
- <event counter="ARM_Mali-400_SW_28" title="Drawcall Statistics" name="Trianglestrip" description="Number of calls to glDraw* with parameter GL_TRIANGLE_STRIP."/>
- <event counter="ARM_Mali-400_SW_29" title="Drawcall Statistics" name="Trianglefan" description="Number of calls to glDraw* with parameter GL_TRIANGLE_FAN."/>
- <event counter="ARM_Mali-400_SW_30" title="Drawcall Statistics" name="Vertex Upload Time (us)" description="Time spent uploading vertex attributes and faceindex data not present in a VBO."/>
- <event counter="ARM_Mali-400_SW_31" title="Drawcall Statistics" name="Uniform Bytes Copied (bytes)" description="Number of bytes copied to Mali memory as a result of uniforms update."/>
+ <event counter="ARM_Mali-4xx_SW_6" title="Drawcall Statistics" name="Points" description="Number of calls to glDraw* with parameter GL_POINTS."/>
+ <event counter="ARM_Mali-4xx_SW_7" title="Drawcall Statistics" name="Lines" description="Number of calls to glDraw* with parameter GL_LINES."/>
+ <event counter="ARM_Mali-4xx_SW_8" title="Drawcall Statistics" name="Lineloop" description="Number of calls to glDraw* with parameter GL_LINE_LOOP."/>
+ <event counter="ARM_Mali-4xx_SW_9" title="Drawcall Statistics" name="Linestrip" description="Number of calls to glDraw* with parameter GL_LINE_STRIP."/>
+ <event counter="ARM_Mali-4xx_SW_10" title="Drawcall Statistics" name="Triangles" description="Number of calls to glDraw* with parameter GL_TRIANGLES."/>
+ <event counter="ARM_Mali-4xx_SW_11" title="Drawcall Statistics" name="Trianglestrip" description="Number of calls to glDraw* with parameter GL_TRIANGLE_STRIP."/>
+ <event counter="ARM_Mali-4xx_SW_12" title="Drawcall Statistics" name="Trianglefan" description="Number of calls to glDraw* with parameter GL_TRIANGLE_FAN."/>
+ <event counter="ARM_Mali-4xx_SW_13" title="Drawcall Statistics" name="Vertex Upload Time (us)" description="Time spent uploading vertex attributes and faceindex data not present in a VBO."/>
+ <event counter="ARM_Mali-4xx_SW_14" title="Drawcall Statistics" name="Uniform Bytes Copied (bytes)" description="Number of bytes copied to Mali memory as a result of uniforms update."/>
<!-- Buffer Profiling Counters -->
- <event counter="ARM_Mali-400_SW_32" title="Buffer Profiling" name="Texture Upload Time (ms)" description="Time spent uploading textures."/>
- <event counter="ARM_Mali-400_SW_33" title="Buffer Profiling" name="VBO Upload Time (ms)" description="Time spent uploading vertex buffer objects."/>
- <event counter="ARM_Mali-400_SW_34" title="Buffer Profiling" name="FBO Flushes" description="Number of flushed on framebuffer attachment."/>
+ <event counter="ARM_Mali-4xx_SW_15" title="Buffer Profiling" name="Texture Upload Time (ms)" description="Time spent uploading textures."/>
+ <event counter="ARM_Mali-4xx_SW_16" title="Buffer Profiling" name="VBO Upload Time (ms)" description="Time spent uploading vertex buffer objects."/>
+ <event counter="ARM_Mali-4xx_SW_17" title="Buffer Profiling" name="FBO Flushes" description="Number of flushed on framebuffer attachment."/>
<!-- OpenGL ES 1.1 Emulation -->
- <event counter="ARM_Mali-400_SW_35" title="Fixed-function Emulation" name="# Vertex Shaders Generated" description="Number of vertex shaders generated."/>
- <event counter="ARM_Mali-400_SW_36" title="Fixed-function Emulation" name="# Fragment Shaders Generated" description="Number of fragment shaders generated."/>
+ <event counter="ARM_Mali-4xx_SW_18" title="Fixed-function Emulation" name="# Vertex Shaders Generated" description="Number of vertex shaders generated."/>
+ <event counter="ARM_Mali-4xx_SW_19" title="Fixed-function Emulation" name="# Fragment Shaders Generated" description="Number of fragment shaders generated."/>
<!-- Geometry Statistics -->
- <event counter="ARM_Mali-400_SW_50" title="Geometry Statistics" name="Triangles" description="The total number of triangles passed to GLES per-frame."/>
- <event counter="ARM_Mali-400_SW_51" title="Geometry Statistics" name="Independent Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLES."/>
- <event counter="ARM_Mali-400_SW_52" title="Geometry Statistics" name="Strip Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_STRIP."/>
- <event counter="ARM_Mali-400_SW_53" title="Geometry Statistics" name="Fan Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_FAN."/>
- <event counter="ARM_Mali-400_SW_54" title="Geometry Statistics" name="Lines" description="Number of lines passed to GLES per-frame."/>
- <event counter="ARM_Mali-400_SW_55" title="Geometry Statistics" name="Independent Lines" description="Number of lines passed to GLES using the mode GL_LINES."/>
- <event counter="ARM_Mali-400_SW_56" title="Geometry Statistics" name="Strip Lines" description="Number of lines passed to GLES using the mode GL_LINE_STRIP."/>
- <event counter="ARM_Mali-400_SW_57" title="Geometry Statistics" name="Loop Lines" description="Number of lines passed to GLES using the mode GL_LINE_LOOP."/>
+ <event counter="ARM_Mali-4xx_SW_33" title="Geometry Statistics" name="Triangles" description="The total number of triangles passed to GLES per-frame."/>
+ <event counter="ARM_Mali-4xx_SW_34" title="Geometry Statistics" name="Independent Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLES."/>
+ <event counter="ARM_Mali-4xx_SW_35" title="Geometry Statistics" name="Strip Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_STRIP."/>
+ <event counter="ARM_Mali-4xx_SW_36" title="Geometry Statistics" name="Fan Triangles" description="Number of triangles passed to GLES using the mode GL_TRIANGLE_FAN."/>
+ <event counter="ARM_Mali-4xx_SW_37" title="Geometry Statistics" name="Lines" description="Number of lines passed to GLES per-frame."/>
+ <event counter="ARM_Mali-4xx_SW_38" title="Geometry Statistics" name="Independent Lines" description="Number of lines passed to GLES using the mode GL_LINES."/>
+ <event counter="ARM_Mali-4xx_SW_39" title="Geometry Statistics" name="Strip Lines" description="Number of lines passed to GLES using the mode GL_LINE_STRIP."/>
+ <event counter="ARM_Mali-4xx_SW_40" title="Geometry Statistics" name="Loop Lines" description="Number of lines passed to GLES using the mode GL_LINE_LOOP."/>
</category>
-
diff --git a/daemon/events-Mali-T6xx_hw.xml b/daemon/events-Mali-T6xx_hw.xml
index 1fd9c4e..8cfe7c3 100644
--- a/daemon/events-Mali-T6xx_hw.xml
+++ b/daemon/events-Mali-T6xx_hw.xml
@@ -5,14 +5,14 @@
<event counter="ARM_Mali-T6xx_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles the GPU had a pending interrupt"/>
<event counter="ARM_Mali-T6xx_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) was active"/>
<event counter="ARM_Mali-T6xx_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) was active"/>
- <event counter="ARM_Mali-T6xx_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (compute) was active"/>
+ <event counter="ARM_Mali-T6xx_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) was active"/>
<event counter="ARM_Mali-T6xx_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
<event counter="ARM_Mali-T6xx_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
<event counter="ARM_Mali-T6xx_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
<event counter="ARM_Mali-T6xx_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
<event counter="ARM_Mali-T6xx_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
- <event counter="ARM_Mali-T6xx_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (compute) completed in JS2"/>
+ <event counter="ARM_Mali-T6xx_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
</category>
diff --git a/daemon/main.cpp b/daemon/main.cpp
index a6ddfe2..d1b0913 100644
--- a/daemon/main.cpp
+++ b/daemon/main.cpp
@@ -19,6 +19,8 @@
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
#include "Child.h"
#include "SessionData.h"
#include "OlySocket.h"
@@ -32,7 +34,7 @@ extern Child* child;
static int shutdownFilesystem();
static pthread_mutex_t numSessions_mutex;
static int numSessions = 0;
-static OlySocket* socket = NULL;
+static OlySocket* sock = NULL;
static bool driverRunningAtStart = false;
static bool driverMountedAtStart = false;
@@ -41,11 +43,13 @@ struct cmdline_t {
char* module;
};
+#define DEFAULT_PORT 8080
+
void cleanUp() {
if (shutdownFilesystem() == -1) {
logg->logMessage("Error shutting down gator filesystem");
}
- delete socket;
+ delete sock;
delete util;
delete logg;
}
@@ -100,6 +104,92 @@ static void child_exit(int signum) {
}
}
+static int udpPort(int port) {
+ int s;
+ struct sockaddr_in sockaddr;
+ int on;
+
+ s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (s == -1) {
+ logg->logError(__FILE__, __LINE__, "socket failed");
+ handleException();
+ }
+
+ on = 1;
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) {
+ logg->logError(__FILE__, __LINE__, "setsockopt failed");
+ handleException();
+ }
+
+ memset((void*)&sockaddr, 0, sizeof(sockaddr));
+ sockaddr.sin_family = AF_INET;
+ sockaddr.sin_port = htons(port);
+ sockaddr.sin_addr.s_addr = INADDR_ANY;
+ if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+ logg->logError(__FILE__, __LINE__, "socket failed");
+ handleException();
+ }
+
+ return s;
+}
+
+#define UDP_ANS_PORT 30000
+#define UDP_REQ_PORT 30001
+
+typedef struct {
+ char rviHeader[8];
+ uint32_t messageID;
+ uint8_t ethernetAddress[8];
+ uint32_t ethernetType;
+ uint32_t dhcp;
+ char dhcpName[40];
+ uint32_t ipAddress;
+ uint32_t defaultGateway;
+ uint32_t subnetMask;
+ uint32_t activeConnections;
+} RVIConfigureInfo;
+
+static const char DST_REQ[] = { 'D', 'S', 'T', '_', 'R', 'E', 'Q', ' ', 0, 0, 0, 0x64 };
+
+static void* answerThread(void* pVoid) {
+ const struct cmdline_t * const cmdline = (struct cmdline_t *)pVoid;
+ RVIConfigureInfo dstAns;
+ int req = udpPort(UDP_REQ_PORT);
+ int ans = udpPort(UDP_ANS_PORT);
+
+ // Format the answer buffer
+ memset(&dstAns, 0, sizeof(dstAns));
+ memcpy(dstAns.rviHeader, "STR_ANS ", sizeof(dstAns.rviHeader));
+ if (gethostname(dstAns.dhcpName, sizeof(dstAns.dhcpName) - 1) != 0) {
+ logg->logError(__FILE__, __LINE__, "gethostname failed");
+ handleException();
+ }
+ // Subvert the defaultGateway field for the port number
+ if (cmdline->port != DEFAULT_PORT) {
+ dstAns.defaultGateway = cmdline->port;
+ }
+ // Subvert the subnetMask field for the protocol version
+ dstAns.subnetMask = PROTOCOL_VERSION;
+
+ for (;;) {
+ char buf[128];
+ struct sockaddr_in sockaddr;
+ socklen_t addrlen;
+ int read;
+ addrlen = sizeof(sockaddr);
+ read = recvfrom(req, &buf, sizeof(buf), 0, (struct sockaddr *)&sockaddr, &addrlen);
+ if (read < 0) {
+ logg->logError(__FILE__, __LINE__, "recvfrom failed");
+ handleException();
+ } else if ((read == 12) && (memcmp(buf, DST_REQ, sizeof(DST_REQ)) == 0)) {
+ if (sendto(ans, &dstAns, sizeof(dstAns), 0, (struct sockaddr *)&sockaddr, addrlen) != sizeof(dstAns)) {
+ logg->logError(__FILE__, __LINE__, "sendto failed");
+ handleException();
+ }
+ }
+ }
+}
+
// retval: -1 = failure; 0 = was already mounted; 1 = successfully mounted
static int mountGatorFS() {
// If already mounted,
@@ -222,14 +312,14 @@ static int shutdownFilesystem() {
static struct cmdline_t parseCommandLine(int argc, char** argv) {
struct cmdline_t cmdline;
- cmdline.port = 8080;
+ cmdline.port = DEFAULT_PORT;
cmdline.module = NULL;
char version_string[256]; // arbitrary length to hold the version information
int c;
// build the version string
if (PROTOCOL_VERSION < PROTOCOL_DEV) {
- snprintf(version_string, sizeof(version_string), "Streamline gatord version %d (DS-5 v5.%d)", PROTOCOL_VERSION, PROTOCOL_VERSION + 1);
+ snprintf(version_string, sizeof(version_string), "Streamline gatord version %d (DS-5 v5.%d)", PROTOCOL_VERSION, PROTOCOL_VERSION);
} else {
snprintf(version_string, sizeof(version_string), "Streamline gatord development version %d", PROTOCOL_VERSION);
}
@@ -277,7 +367,7 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
}
// Error checking
- if (cmdline.port != 8080 && gSessionData->mSessionXMLPath != NULL) {
+ if (cmdline.port != DEFAULT_PORT && gSessionData->mSessionXMLPath != NULL) {
logg->logError(__FILE__, __LINE__, "Only a port or a session xml can be specified, not both");
handleException();
}
@@ -339,11 +429,16 @@ int main(int argc, char** argv, char* envp[]) {
child->run();
delete child;
} else {
- socket = new OlySocket(cmdline.port, true);
+ pthread_t answerThreadID;
+ if (pthread_create(&answerThreadID, NULL, answerThread, &cmdline)) {
+ logg->logError(__FILE__, __LINE__, "Failed to create answer thread");
+ handleException();
+ }
+ sock = new OlySocket(cmdline.port, true);
// Forever loop, can be exited via a signal or exception
while (1) {
logg->logMessage("Waiting on connection...");
- socket->acceptConnection();
+ sock->acceptConnection();
int pid = fork();
if (pid < 0) {
@@ -351,14 +446,14 @@ int main(int argc, char** argv, char* envp[]) {
logg->logError(__FILE__, __LINE__, "Fork process failed. Please power cycle the target device if this error persists.");
} else if (pid == 0) {
// Child
- socket->closeServerSocket();
- child = new Child(socket, numSessions + 1);
+ sock->closeServerSocket();
+ child = new Child(sock, numSessions + 1);
child->run();
delete child;
exit(0);
} else {
// Parent
- socket->closeSocket();
+ sock->closeSocket();
pthread_mutex_lock(&numSessions_mutex);
numSessions++;
diff --git a/driver/Makefile b/driver/Makefile
index 3af8b8d..0d4ca68 100644
--- a/driver/Makefile
+++ b/driver/Makefile
@@ -21,10 +21,13 @@ gator-y += gator_events_mali_t6xx.o \
gator_events_mali_t6xx_hw.o
include $(M)/mali_t6xx.mk
else
-gator-y += gator_events_mali_400.o
+gator-y += gator_events_mali_4xx.o
endif
gator-y += gator_events_mali_common.o
EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
+ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
+EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE)
+endif
endif
# GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
@@ -33,9 +36,12 @@ EXTRA_CFLAGS += -DGATOR_TEST=$(GATOR_TEST)
gator-$(CONFIG_ARM) += gator_events_armv6.o \
gator_events_armv7.o \
+ gator_events_ccn-504.o \
gator_events_l2c-310.o \
gator_events_scorpion.o
+gator-$(CONFIG_ARM64) += gator_events_ccn-504.o
+
$(obj)/gator_main.o: gator_events.h
clean-files := gator_events.h
diff --git a/driver/gator.h b/driver/gator.h
index 205cbcd..2e122da 100644
--- a/driver/gator.h
+++ b/driver/gator.h
@@ -20,8 +20,6 @@
#define GATOR_CPU_FREQ_SUPPORT (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ)
#define GATOR_IKS_SUPPORT defined(CONFIG_BL_SWITCHER)
-#define GATOR_LIVE 1
-
// cpu ids
#define ARM1136 0xb36
#define ARM1156 0xb56
@@ -31,6 +29,7 @@
#define CORTEX_A7 0xc07
#define CORTEX_A8 0xc08
#define CORTEX_A9 0xc09
+#define CORTEX_A12 0xc0d
#define CORTEX_A15 0xc0f
#define SCORPION 0x00f
#define SCORPIONMP 0x02d
@@ -46,9 +45,14 @@
struct gator_cpu {
const int cpuid;
+ // Human readable name
const char core_name[MAXSIZE_CORE_NAME];
+ // Perf PMU name
const char * const pmu_name;
+ // gatorfs event name
const char * const pmnc_name;
+ // compatible from Documentation/devicetree/bindings/arm/cpus.txt
+ const char * const dt_name;
const int pmnc_counters;
};
diff --git a/driver/gator_annotate.c b/driver/gator_annotate.c
index ad9f309..5b9399b 100644
--- a/driver/gator_annotate.c
+++ b/driver/gator_annotate.c
@@ -39,14 +39,17 @@ static int annotate_copy(struct file *file, char const __user *buf, size_t count
static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset)
{
int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
+ bool interrupt_context;
if (*offset) {
return -EINVAL;
}
- // Annotations are not supported in interrupt context
- if (in_interrupt()) {
- printk(KERN_WARNING "gator: Annotations are not supported in interrupt context\n");
+ interrupt_context = in_interrupt();
+ // Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code.
+ // By doing so, annotations in interrupt context can result in deadlocks and lost data.
+ if (interrupt_context) {
+ printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
return -EINVAL;
}
@@ -77,7 +80,19 @@ static ssize_t annotate_write(struct file *file, char const __user *buf, size_t
if (size <= 0) {
// Buffer is full, wait until space is available
spin_unlock(&annotate_lock);
+
+ // Drop the annotation as blocking is not allowed in interrupt context
+ if (interrupt_context) {
+ return -EINVAL;
+ }
+
wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
+
+ // Check to see if a signal is pending
+ if (signal_pending(current)) {
+ return -EINTR;
+ }
+
goto retry;
}
diff --git a/driver/gator_cookies.c b/driver/gator_cookies.c
index c332187..5f98a1c 100644
--- a/driver/gator_cookies.c
+++ b/driver/gator_cookies.c
@@ -240,13 +240,13 @@ static inline uint32_t get_cookie(int cpu, struct task_struct *task, const char
if (strcmp(text, "app_process") == 0) {
if (!translate_app_process(&text, cpu, task, from_wq))
- return INVALID_COOKIE;
+ return UNRESOLVED_COOKIE;
}
// Can be called from interrupt handler or from work queue or from scheduler trace
local_irq_save(flags);
- cookie = INVALID_COOKIE;
+ cookie = UNRESOLVED_COOKIE;
if (marshal_cookie_header(text)) {
cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids;
cookiemap_add(key, cookie);
@@ -272,7 +272,7 @@ static int get_exec_cookie(int cpu, struct task_struct *task)
return get_cookie(cpu, task, text, false);
}
- return INVALID_COOKIE;
+ return UNRESOLVED_COOKIE;
}
static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset)
@@ -302,7 +302,7 @@ static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsig
}
if (!vma)
- cookie = INVALID_COOKIE;
+ cookie = UNRESOLVED_COOKIE;
return cookie;
}
diff --git a/driver/gator_events_ccn-504.c b/driver/gator_events_ccn-504.c
new file mode 100644
index 0000000..b91a9a1
--- /dev/null
+++ b/driver/gator_events_ccn-504.c
@@ -0,0 +1,306 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*******************************************************************************
+ * WARNING: This code is an experimental implementation of the CCN-504 hardware
+ * counters which has not been tested on the hardware. Commented debug
+ * statements are present and can be uncommented for diagnostic purposes.
+ ******************************************************************************/
+
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include "gator.h"
+
+#define PERIPHBASE 0x2E000000
+
+#define NUM_REGIONS 256
+#define REGION_SIZE (64*1024)
+#define REGION_DEBUG 1
+#define REGION_XP 64
+
+// DT (Debug) region
+#define PMEVCNTSR0 0x0150
+#define PMCCNTRSR 0x0190
+#define PMCR 0x01A8
+#define PMSR 0x01B0
+#define PMSR_REQ 0x01B8
+#define PMSR_CLR 0x01C0
+
+// XP region
+#define DT_CONFIG 0x0300
+
+// Multiple
+#define PMU_EVENT_SEL 0x0600
+#define OLY_ID 0xFF00
+
+#define CCNT 4
+#define CNTMAX (4 + 1)
+
+#define get_pmu_event_id(event) (((event) >> 0) & 0xFF)
+#define get_node_type(event) (((event) >> 8) & 0xFF)
+#define get_region(event) (((event) >> 16) & 0xFF)
+
+MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address");
+static unsigned long ccn504_addr = 0;
+module_param(ccn504_addr, ulong, 0444);
+
+static void __iomem *gator_events_ccn504_base;
+static unsigned long gator_events_ccn504_enabled[CNTMAX];
+static unsigned long gator_events_ccn504_event[CNTMAX];
+static unsigned long gator_events_ccn504_key[CNTMAX];
+static int gator_events_ccn504_buffer[2*CNTMAX];
+
+static void gator_events_ccn504_create_shutdown(void)
+{
+ if (gator_events_ccn504_base != NULL) {
+ iounmap(gator_events_ccn504_base);
+ }
+}
+
+static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+ int i;
+ char buf[32];
+
+ for (i = 0; i < CNTMAX; ++i) {
+ if (i == CCNT) {
+ snprintf(buf, sizeof(buf), "CCN-504_ccnt");
+ } else {
+ snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i);
+ }
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (!dir) {
+ return -1;
+ }
+
+ gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]);
+ if (i != CCNT) {
+ gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]);
+ }
+ gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]);
+ }
+
+ return 0;
+}
+
+static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value)
+{
+ u32 dt_config;
+
+ dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+ dt_config |= (value + event_num) << (4*event_num);
+ //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, dt_config, (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+ writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+}
+
+static int gator_events_ccn504_start(void)
+{
+ int i;
+
+ // Disable INTREQ on overflow
+ // [6] ovfl_intr_en = 0
+ // perhaps set to 1?
+ // [5] cntr_rst = 0
+ // No register paring
+ // [4:1] cntcfg = 0
+ // Enable PMU features
+ // [0] pmu_en = 1
+ //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0x1, REGION_DEBUG*REGION_SIZE + PMCR);
+ writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR);
+
+ // Assume no other pmu_event_sel registers are set
+
+ // cycle counter does not need to be enabled
+ for (i = 0; i < CCNT; ++i) {
+ int pmu_event_id;
+ int node_type;
+ int region;
+ u32 pmu_event_sel;
+ u32 oly_id_whole;
+ u32 oly_id;
+ u32 node_id;
+
+ if (!gator_events_ccn504_enabled[i]) {
+ continue;
+ }
+
+ pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]);
+ node_type = get_node_type(gator_events_ccn504_event[i]);
+ region = get_region(gator_events_ccn504_event[i]);
+ //printk(KERN_ERR "%s(%s:%i) pmu_event_id: %x node_type: %x region: %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_id, node_type, region);
+
+ // Verify the node_type
+ oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID);
+ oly_id = oly_id_whole & 0x1F;
+ node_id = (oly_id_whole >> 8) & 0x7F;
+ if ((oly_id != node_type) ||
+ ((node_type == 0x16) && ((oly_id == 0x14) || (oly_id == 0x15) || (oly_id == 0x16) || (oly_id == 0x18) || (oly_id == 0x19) || (oly_id == 0x1A)))) {
+ printk(KERN_ERR "%s(%s:%i) oly_id is %x expected %x\n", __FUNCTION__, __FILE__, __LINE__, oly_id, node_type);
+ return -1;
+ }
+
+ // Set the control register
+ pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ switch (node_type) {
+ case 0x08: // XP
+ pmu_event_sel |= pmu_event_id << (7*i);
+ gator_events_ccn504_set_dt_config(node_id, i, 0x4);
+ break;
+ case 0x04: // HN-F
+ case 0x16: // RN-I
+ case 0x10: // SBAS
+ pmu_event_sel |= pmu_event_id << (4*i);
+ gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC);
+ break;
+ }
+ //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_sel, region*REGION_SIZE + PMU_EVENT_SEL);
+ writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ }
+
+ return 0;
+}
+
+static void gator_events_ccn504_stop(void)
+{
+ int i;
+
+ // cycle counter does not need to be disabled
+ for (i = 0; i < CCNT; ++i) {
+ int node_type;
+ int region;
+
+ node_type = get_node_type(gator_events_ccn504_event[i]);
+ region = get_region(gator_events_ccn504_event[i]);
+
+ //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, region*REGION_SIZE + PMU_EVENT_SEL);
+ writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ }
+
+ // Clear dt_config
+ for (i = 0; i < 11; ++i) {
+ //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+ writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+ }
+}
+
+static int gator_events_ccn504_read(int **buffer)
+{
+ int i;
+ int len = 0;
+
+ if (!on_primary_core()) {
+ return 0;
+ }
+
+ // Verify the pmsr register is zero
+ //i = 0;
+ while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0) {
+ //++i;
+ }
+ //printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i);
+
+ // Request a PMU snapshot
+ writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ);
+
+ // Wait for the snapshot
+ //i = 0;
+ while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0) {
+ //++i;
+ }
+ //printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i);
+
+ // Read the shadow registers
+ for (i = 0; i < CNTMAX; ++i) {
+ if (!gator_events_ccn504_enabled[i]) {
+ continue;
+ }
+
+ gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i];
+ gator_events_ccn504_buffer[len++] = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i));
+
+ // Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does?
+ }
+
+ // Clear the PMU snapshot status
+ writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR);
+
+ return len;
+}
+
+static void __maybe_unused gator_events_ccn504_enumerate(int pos, int size)
+{
+ int i;
+ u32 oly_id;
+
+ for (i = pos; i < pos + size; ++i) {
+ oly_id = readl(gator_events_ccn504_base + i*REGION_SIZE + OLY_ID);
+ printk(KERN_ERR "%s(%s:%i) %i %08x\n", __FUNCTION__, __FILE__, __LINE__, i, oly_id);
+ }
+}
+
+static struct gator_interface gator_events_ccn504_interface = {
+ .shutdown = gator_events_ccn504_create_shutdown,
+ .create_files = gator_events_ccn504_create_files,
+ .start = gator_events_ccn504_start,
+ .stop = gator_events_ccn504_stop,
+ .read = gator_events_ccn504_read,
+};
+
+int gator_events_ccn504_init(void)
+{
+ int i;
+
+ if (ccn504_addr == 0) {
+ return -1;
+ }
+
+ gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE);
+ if (gator_events_ccn504_base == NULL) {
+ printk(KERN_ERR "%s(%s:%i) ioremap returned NULL\n", __FUNCTION__, __FILE__, __LINE__);
+ return -1;
+ }
+ //printk(KERN_ERR "%s(%s:%i)\n", __FUNCTION__, __FILE__, __LINE__);
+
+ // Test - can memory be read
+ {
+ //gator_events_ccn504_enumerate(0, NUM_REGIONS);
+
+#if 0
+ // DT
+ gator_events_ccn504_enumerate(1, 1);
+ // HN-F
+ gator_events_ccn504_enumerate(32, 8);
+ // XP
+ gator_events_ccn504_enumerate(64, 11);
+ // RN-I
+ gator_events_ccn504_enumerate(128, 1);
+ gator_events_ccn504_enumerate(130, 1);
+ gator_events_ccn504_enumerate(134, 1);
+ gator_events_ccn504_enumerate(140, 1);
+ gator_events_ccn504_enumerate(144, 1);
+ gator_events_ccn504_enumerate(148, 1);
+ // SBAS
+ gator_events_ccn504_enumerate(129, 1);
+ gator_events_ccn504_enumerate(137, 1);
+ gator_events_ccn504_enumerate(139, 1);
+ gator_events_ccn504_enumerate(147, 1);
+#endif
+ }
+
+ for (i = 0; i < CNTMAX; ++i) {
+ gator_events_ccn504_enabled[i] = 0;
+ gator_events_ccn504_event[i] = 0;
+ gator_events_ccn504_key[i] = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_ccn504_interface);
+}
+
+gator_events_init(gator_events_ccn504_init);
diff --git a/driver/gator_events_mali_400.c b/driver/gator_events_mali_4xx.c
index 38c97d1..dd275f7 100644
--- a/driver/gator_events_mali_400.c
+++ b/driver/gator_events_mali_4xx.c
@@ -15,132 +15,35 @@
#include "linux/mali_linux_trace.h"
#include "gator_events_mali_common.h"
-#include "gator_events_mali_400.h"
+#include "gator_events_mali_4xx.h"
/*
- * There are (currently) three different variants of the comms between gator and Mali:
+ * There are (currently) four different variants of the comms between gator and Mali:
* 1 (deprecated): No software counter support
* 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
* 3 (default): Single tracepoint for all s/w counters in a bundle.
* Interface style 3 is the default if no other is specified. 1 and 2 will be eliminated when
* existing Mali DDKs are upgraded.
+ * 4. As above, but for the Utgard (Mali-450) driver.
*/
#if !defined(GATOR_MALI_INTERFACE_STYLE)
#define GATOR_MALI_INTERFACE_STYLE (3)
#endif
-/*
- * List of possible actions allowing DDK to be controlled by Streamline.
- * The following numbers are used by DDK to control the frame buffer dumping.
- */
-#define FBDUMP_CONTROL_ENABLE (1)
-#define FBDUMP_CONTROL_RATE (2)
-#define SW_EVENTS_ENABLE (3)
-#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#if GATOR_MALI_INTERFACE_STYLE < 4
+#include "mali/mali_mjollnir_profiling_gator_api.h"
+#else
+#include "mali/mali_utgard_profiling_gator_api.h"
+#endif
/*
* Check that the MALI_SUPPORT define is set to one of the allowable device codes.
*/
-#if (MALI_SUPPORT != MALI_400)
-#error MALI_SUPPORT set to an invalid device code: expecting MALI_400
+#if (MALI_SUPPORT != MALI_4xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
#endif
-/*
- * The number of fragment processors. Update to suit your hardware implementation.
- */
-#define NUM_FP_UNITS (4)
-
-enum counters {
- /* Timeline activity */
- ACTIVITY_VP = 0,
- ACTIVITY_FP0,
- ACTIVITY_FP1,
- ACTIVITY_FP2,
- ACTIVITY_FP3,
-
- /* L2 cache counters */
- COUNTER_L2_C0,
- COUNTER_L2_C1,
-
- /* Vertex processor counters */
- COUNTER_VP_C0,
- COUNTER_VP_C1,
-
- /* Fragment processor counters */
- COUNTER_FP0_C0,
- COUNTER_FP0_C1,
- COUNTER_FP1_C0,
- COUNTER_FP1_C1,
- COUNTER_FP2_C0,
- COUNTER_FP2_C1,
- COUNTER_FP3_C0,
- COUNTER_FP3_C1,
-
- /* EGL Software Counters */
- COUNTER_EGL_BLIT_TIME,
-
- /* GLES Software Counters */
- COUNTER_GLES_DRAW_ELEMENTS_CALLS,
- COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
- COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
- COUNTER_GLES_DRAW_ARRAYS_CALLS,
- COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
- COUNTER_GLES_DRAW_POINTS,
- COUNTER_GLES_DRAW_LINES,
- COUNTER_GLES_DRAW_LINE_LOOP,
- COUNTER_GLES_DRAW_LINE_STRIP,
- COUNTER_GLES_DRAW_TRIANGLES,
- COUNTER_GLES_DRAW_TRIANGLE_STRIP,
- COUNTER_GLES_DRAW_TRIANGLE_FAN,
- COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
- COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
- COUNTER_GLES_UPLOAD_TEXTURE_TIME,
- COUNTER_GLES_UPLOAD_VBO_TIME,
- COUNTER_GLES_NUM_FLUSHES,
- COUNTER_GLES_NUM_VSHADERS_GENERATED,
- COUNTER_GLES_NUM_FSHADERS_GENERATED,
- COUNTER_GLES_VSHADER_GEN_TIME,
- COUNTER_GLES_FSHADER_GEN_TIME,
- COUNTER_GLES_INPUT_TRIANGLES,
- COUNTER_GLES_VXCACHE_HIT,
- COUNTER_GLES_VXCACHE_MISS,
- COUNTER_GLES_VXCACHE_COLLISION,
- COUNTER_GLES_CULLED_TRIANGLES,
- COUNTER_GLES_CULLED_LINES,
- COUNTER_GLES_BACKFACE_TRIANGLES,
- COUNTER_GLES_GBCLIP_TRIANGLES,
- COUNTER_GLES_GBCLIP_LINES,
- COUNTER_GLES_TRIANGLES_DRAWN,
- COUNTER_GLES_DRAWCALL_TIME,
- COUNTER_GLES_TRIANGLES_COUNT,
- COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
- COUNTER_GLES_STRIP_TRIANGLES_COUNT,
- COUNTER_GLES_FAN_TRIANGLES_COUNT,
- COUNTER_GLES_LINES_COUNT,
- COUNTER_GLES_INDEPENDENT_LINES_COUNT,
- COUNTER_GLES_STRIP_LINES_COUNT,
- COUNTER_GLES_LOOP_LINES_COUNT,
-
- COUNTER_FILMSTRIP,
- COUNTER_FREQUENCY,
- COUNTER_VOLTAGE,
-
- NUMBER_OF_EVENTS
-};
-
-#define FIRST_ACTIVITY_EVENT ACTIVITY_VP
-#define LAST_ACTIVITY_EVENT ACTIVITY_FP3
-
-#define FIRST_HW_COUNTER COUNTER_L2_C0
-#define LAST_HW_COUNTER COUNTER_FP3_C1
-
-#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME
-#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT
-
-#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP
-#define LAST_SPECIAL_COUNTER COUNTER_VOLTAGE
-
/* gatorfs variables for counter enable state,
* the event the counter should count and the
* 'key' (a unique id set by gatord and returned
@@ -164,6 +67,15 @@ static unsigned long counter_prev[NUMBER_OF_EVENTS];
/* Note whether tracepoints have been registered */
static int trace_registered;
+/*
+ * These numbers define the actual numbers of each block type that exist in the system. Initially
+ * these are set to the maxima defined above; if the driver is capable of being queried (newer
+ * drivers only) then the values may be revised.
+ */
+static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
+static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
+static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
+
/**
* Calculate the difference and handle the overflow.
*/
@@ -194,6 +106,12 @@ static inline int is_hw_counter(unsigned int event_id)
return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
}
+/*
+ * These are provided for utgard compatibility.
+ */
+typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
+typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
+
#if GATOR_MALI_INTERFACE_STYLE == 2
/**
* Returns non-zero if the given counter ID is a software counter.
@@ -263,7 +181,7 @@ GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long
}
#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
-#if GATOR_MALI_INTERFACE_STYLE == 3
+#if GATOR_MALI_INTERFACE_STYLE >= 3
GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
{
u32 i;
@@ -275,136 +193,167 @@ GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surfac
}
}
}
-#endif /* GATOR_MALI_INTERFACE_STYLE == 3 */
+#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
-static int create_files(struct super_block *sb, struct dentry *root)
+/**
+ * Create a single filesystem entry for a specified event.
+ * @param sb the superblock
+ * @param root Filesystem root
+ * @param name The name of the entry to create
+ * @param event The ID of the event
+ * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create.
+ *
+ * @return 0 if ok, non-zero if the create failed.
+ */
+static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item)
{
struct dentry *dir;
- int event;
- int n_fp = NUM_FP_UNITS;
- const char *mali_name = gator_mali_get_mali_name();
+ dir = gatorfs_mkdir(sb, root, name);
- /*
- * Create the filesystem entries for vertex processor, fragment processor
- * and L2 cache timeline and hardware counters. Software counters get
- * special handling after this block.
- */
- for (event = FIRST_ACTIVITY_EVENT; event <= LAST_HW_COUNTER; event++) {
- char buf[40];
+ if (!dir) {
+ return -1;
+ }
- /*
- * We can skip this event if it's for a non-existent fragment
- * processor.
- */
- if (((event - ACTIVITY_FP0 >= n_fp) && (event < COUNTER_L2_C0))
- || (((event - COUNTER_FP0_C0) / 2 >= n_fp))) {
- continue;
- }
+ if (create_event_item) {
+ gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
+ }
- /* Otherwise, set up the filesystem entry for this event. */
- switch (event) {
- case ACTIVITY_VP:
- snprintf(buf, sizeof buf, "ARM_%s_VP_active", mali_name);
- break;
- case ACTIVITY_FP0:
- case ACTIVITY_FP1:
- case ACTIVITY_FP2:
- case ACTIVITY_FP3:
- snprintf(buf, sizeof buf, "ARM_%s_FP%d_active",
- mali_name, event - ACTIVITY_FP0);
- break;
- case COUNTER_L2_C0:
- case COUNTER_L2_C1:
- snprintf(buf, sizeof buf, "ARM_%s_L2_cnt%d",
- mali_name, event - COUNTER_L2_C0);
- break;
- case COUNTER_VP_C0:
- case COUNTER_VP_C1:
- snprintf(buf, sizeof buf, "ARM_%s_VP_cnt%d",
- mali_name, event - COUNTER_VP_C0);
- break;
- case COUNTER_FP0_C0:
- case COUNTER_FP0_C1:
- case COUNTER_FP1_C0:
- case COUNTER_FP1_C1:
- case COUNTER_FP2_C0:
- case COUNTER_FP2_C1:
- case COUNTER_FP3_C0:
- case COUNTER_FP3_C1:
- snprintf(buf, sizeof buf, "ARM_%s_FP%d_cnt%d",
- mali_name, (event - COUNTER_FP0_C0) / 2,
- (event - COUNTER_FP0_C0) % 2);
- break;
- default:
- printk("gator: trying to create file for non-existent counter (%d)\n", event);
- continue;
- }
+ gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
- dir = gatorfs_mkdir(sb, root, buf);
+ return 0;
+}
- if (!dir) {
- return -1;
- }
+#if GATOR_MALI_INTERFACE_STYLE > 3
+/*
+ * Read the version info structure if available
+ */
+static void initialise_version_info(void)
+{
+ _mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol;
- gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+ mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
- /* Only create an event node for counters that can change what they count */
- if (event >= COUNTER_L2_C0) {
- gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
- }
+ if (mali_profiling_get_mali_version_symbol) {
+ struct _mali_profiling_mali_version version_info;
- gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+ pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n",
+ mali_profiling_get_mali_version_symbol);
+
+ /*
+ * Revise the number of each different core type using information derived from the DDK.
+ */
+ mali_profiling_get_mali_version_symbol(&version_info);
+
+ n_fp_cores = version_info.num_of_fp_cores;
+ n_vp_cores = version_info.num_of_vp_cores;
+ n_l2_cores = version_info.num_of_l2_cores;
+
+ /* Release the function - we're done with it. */
+ symbol_put(_mali_profiling_get_mali_version);
+ } else {
+ printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
}
+}
+#endif
- /* Now set up the software counter entries */
- for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
- char buf[40];
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+ int event;
+ const char *mali_name = gator_mali_get_mali_name();
- snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event);
+ char buf[40];
+ int core_id;
+ int counter_number;
- dir = gatorfs_mkdir(sb, root, buf);
+ pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE);
- if (!dir) {
+#if GATOR_MALI_INTERFACE_STYLE > 3
+ /*
+ * Initialise first: this sets up the number of cores available (on compatible DDK versions).
+ * Ideally this would not need guarding but other parts of the code depend on the interface style being set
+ * correctly; if it is not then the system can enter an inconsistent state.
+ */
+ initialise_version_info();
+#endif
+
+ /* Vertex processor counters */
+ for (core_id = 0; core_id < n_vp_cores; core_id++) {
+ int activity_counter_id = ACTIVITY_VP_0;
+ snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id);
+ if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
return -1;
}
- gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
- gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+
+ snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
+ }
}
- /* Now set up the special counter entries */
- for (event = FIRST_SPECIAL_COUNTER; event <= LAST_SPECIAL_COUNTER; event++) {
- char buf[40];
+ /* Fragment processors' counters */
+ for (core_id = 0; core_id < n_fp_cores; core_id++) {
+ int activity_counter_id = ACTIVITY_FP_0 + core_id;
- switch (event) {
- case COUNTER_FILMSTRIP:
- snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
- break;
+ snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id);
+ if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+ return -1;
+ }
- case COUNTER_FREQUENCY:
- snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
- break;
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
- case COUNTER_VOLTAGE:
- snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
- break;
+ snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
+ }
+ }
- default:
- break;
+ /* L2 Cache counters */
+ for (core_id = 0; core_id < n_l2_cores; core_id++) {
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+
+ snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
}
+ }
- dir = gatorfs_mkdir(sb, root, buf);
+ /* Now set up the software counter entries */
+ for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
+ snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
- if (!dir) {
+ if (create_fs_entry(sb, root, buf, event, 0) != 0) {
return -1;
}
+ }
- gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
- gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
- gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+ /* Now set up the special counter entries */
+ snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) {
+ return -1;
+ }
+
+#ifdef DVFS_REPORTED_BY_DDK
+ snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) {
+ return -1;
}
+ snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) {
+ return -1;
+ }
+#endif
+
return 0;
}
@@ -413,16 +362,17 @@ static int create_files(struct super_block *sb, struct dentry *root)
* This is stored here since it is used very regularly.
*/
static mali_profiling_get_counters_type *mali_get_counters = NULL;
+static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL;
/*
- * Examine list of software counters and determine if any one is enabled.
+ * Examine list of counters between two index limits and determine if any one is enabled.
* Returns 1 if any counter is enabled, 0 if none is.
*/
-static int is_any_sw_counter_enabled(void)
+static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter)
{
unsigned int i;
- for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
+ for (i = first_counter; i <= last_counter; i++) {
if (counter_enabled[i]) {
return 1; /* At least one counter is enabled */
}
@@ -431,28 +381,26 @@ static int is_any_sw_counter_enabled(void)
return 0; /* No s/w counters enabled */
}
-static void mali_counter_initialize(void)
+static void init_counters(unsigned int from_counter, unsigned int to_counter)
{
+ unsigned int counter_id;
+
/* If a Mali driver is present and exporting the appropriate symbol
* then we can request the HW counters (of which there are only 2)
* be configured to count the desired events
*/
mali_profiling_set_event_type *mali_set_hw_event;
- mali_osk_fb_control_set_type *mali_set_fb_event;
- mali_profiling_control_type *mali_control;
mali_set_hw_event = symbol_get(_mali_profiling_set_event);
if (mali_set_hw_event) {
- int i;
-
pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
- for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
- if (counter_enabled[i]) {
- mali_set_hw_event(i, counter_event[i]);
+ for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+ if (counter_enabled[counter_id]) {
+ mali_set_hw_event(counter_id, counter_event[counter_id]);
} else {
- mali_set_hw_event(i, 0xFFFFFFFF);
+ mali_set_hw_event(counter_id, 0xFFFFFFFF);
}
}
@@ -460,6 +408,19 @@ static void mali_counter_initialize(void)
} else {
printk("gator: mali online _mali_profiling_set_event symbol not found\n");
}
+}
+
+static void mali_counter_initialize(void)
+{
+ int i;
+ int core_id;
+
+ mali_osk_fb_control_set_type *mali_set_fb_event;
+ mali_profiling_control_type *mali_control;
+
+ init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1);
+ init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1);
+ init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1);
mali_set_fb_event = symbol_get(_mali_osk_fb_control_set);
@@ -482,7 +443,7 @@ static void mali_counter_initialize(void)
pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
- mali_control(SW_EVENTS_ENABLE, (is_any_sw_counter_enabled() ? 1 : 0));
+ mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0));
mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
mali_control(FBDUMP_CONTROL_RATE, rate);
mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
@@ -497,11 +458,34 @@ static void mali_counter_initialize(void)
mali_get_counters = symbol_get(_mali_profiling_get_counters);
if (mali_get_counters) {
pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
- counter_prev[COUNTER_L2_C0] = 0;
- counter_prev[COUNTER_L2_C1] = 0;
+
} else {
pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
}
+
+ mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
+ if (mali_get_l2_counters) {
+ pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
+
+ } else {
+ pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined");
+ }
+
+ if (!mali_get_counters && !mali_get_l2_counters) {
+ pr_debug("gator: WARNING: no L2 counters available");
+ n_l2_cores = 0;
+ }
+
+ for (core_id = 0; core_id < n_l2_cores; core_id++) {
+ int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
+ counter_prev[counter_id] = 0;
+ counter_prev[counter_id + 1] = 0;
+ }
+
+ /* Clear counters in the start */
+ for (i = 0; i < NUMBER_OF_EVENTS; i++) {
+ counter_data[i] = 0;
+ }
}
static void mali_counter_deinitialize(void)
@@ -544,7 +528,7 @@ static void mali_counter_deinitialize(void)
pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_set_fb_event);
/* Reset the DDK state - disable counter collection */
- mali_control(SW_EVENTS_ENABLE, 0);
+ mali_control(SW_COUNTER_ENABLE, 0);
mali_control(FBDUMP_CONTROL_ENABLE, 0);
@@ -557,6 +541,9 @@ static void mali_counter_deinitialize(void)
symbol_put(_mali_profiling_get_counters);
}
+ if (mali_get_l2_counters) {
+ symbol_put(_mali_profiling_get_l2_counters);
+ }
}
static int start(void)
@@ -575,8 +562,8 @@ static int start(void)
printk("gator: mali_sw_counter tracepoint failed to activate\n");
return -1;
}
-#elif GATOR_MALI_INTERFACE_STYLE == 3
-/* For Mali drivers with built-in support. */
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+ /* For Mali drivers with built-in support. */
if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
printk("gator: mali_sw_counters tracepoint failed to activate\n");
return -1;
@@ -605,7 +592,7 @@ static void stop(void)
#elif GATOR_MALI_INTERFACE_STYLE == 2
/* For patched Mali driver. */
GATOR_UNREGISTER_TRACE(mali_sw_counter);
-#elif GATOR_MALI_INTERFACE_STYLE == 3
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
/* For Mali drivers with built-in support. */
GATOR_UNREGISTER_TRACE(mali_sw_counters);
#else
@@ -617,7 +604,7 @@ static void stop(void)
trace_registered = 0;
}
- for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) {
+ for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
counter_enabled[cnt] = 0;
counter_event[cnt] = 0;
counter_address[cnt] = NULL;
@@ -626,67 +613,99 @@ static void stop(void)
mali_counter_deinitialize();
}
+static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len)
+{
+ unsigned int counter_id;
+
+ for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+ if (counter_enabled[counter_id]) {
+ counter_dump[(*len)++] = counter_key[counter_id];
+ counter_dump[(*len)++] = counter_data[counter_id];
+
+ counter_data[counter_id] = 0;
+ }
+ }
+}
+
static int read(int **buffer)
{
- int cnt, len = 0;
+ int len = 0;
if (!on_primary_core())
return 0;
// Read the L2 C0 and C1 here.
- if (counter_enabled[COUNTER_L2_C0] || counter_enabled[COUNTER_L2_C1]) {
- u32 src0 = 0;
- u32 val0 = 0;
- u32 src1 = 0;
- u32 val1 = 0;
-
- // Poke the driver to get the counter values
- if (mali_get_counters) {
- mali_get_counters(&src0, &val0, &src1, &val1);
+ if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
+ unsigned int unavailable_l2_caches = 0;
+ _mali_profiling_l2_counter_values cache_values;
+ unsigned int cache_id;
+ struct _mali_profiling_core_counters *per_core;
+
+ /* Poke the driver to get the counter values - older style; only one L2 cache */
+ if (mali_get_l2_counters) {
+ unavailable_l2_caches = mali_get_l2_counters(&cache_values);
+ } else if (mali_get_counters) {
+ per_core = &cache_values.cores[0];
+ mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1);
+ } else {
+ /* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */
}
- if (counter_enabled[COUNTER_L2_C0]) {
- // Calculate and save src0's counter val0
- counter_dump[len++] = counter_key[COUNTER_L2_C0];
- counter_dump[len++] = get_difference(val0, counter_prev[COUNTER_L2_C0]);
- }
+ /* Fill in the two cache counter values for each cache block. */
+ for (cache_id = 0; cache_id < n_l2_cores; cache_id++) {
+ unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
+ unsigned int counter_id_1 = counter_id_0 + 1;
- if (counter_enabled[COUNTER_L2_C1]) {
- // Calculate and save src1's counter val1
- counter_dump[len++] = counter_key[COUNTER_L2_C1];
- counter_dump[len++] = get_difference(val1, counter_prev[COUNTER_L2_C1]);
- }
+ if ((1 << cache_id) & unavailable_l2_caches) {
+ continue; /* This cache is unavailable (powered-off, possibly). */
+ }
+
+ per_core = &cache_values.cores[cache_id];
+
+ if (counter_enabled[counter_id_0]) {
+ // Calculate and save src0's counter val0
+ counter_dump[len++] = counter_key[counter_id_0];
+ counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+ }
- // Save the previous values for the counters.
- counter_prev[COUNTER_L2_C0] = val0;
- counter_prev[COUNTER_L2_C1] = val1;
+ if (counter_enabled[counter_id_1]) {
+ // Calculate and save src1's counter val1
+ counter_dump[len++] = counter_key[counter_id_1];
+ counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+ }
+
+ // Save the previous values for the counters.
+ counter_prev[counter_id_0] = per_core->value0;
+ counter_prev[counter_id_1] = per_core->value1;
+ }
}
- // Process other (non-timeline) counters.
- for (cnt = COUNTER_VP_C0; cnt <= LAST_SW_COUNTER; cnt++) {
+ /* Process other (non-timeline) counters. */
+ dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len);
+ dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len);
+
+ dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len);
+
+#ifdef DVFS_REPORTED_BY_DDK
+ {
+ int cnt;
+ /*
+ * Add in the voltage and frequency counters if enabled. Note that, since these are
+ * actually passed as events, the counter value should not be cleared.
+ */
+ cnt = COUNTER_FREQUENCY;
if (counter_enabled[cnt]) {
counter_dump[len++] = counter_key[cnt];
counter_dump[len++] = counter_data[cnt];
-
- counter_data[cnt] = 0;
}
- }
-
- /*
- * Add in the voltage and frequency counters if enabled. Note that, since these are
- * actually passed as events, the counter value should not be cleared.
- */
- cnt = COUNTER_FREQUENCY;
- if (counter_enabled[cnt]) {
- counter_dump[len++] = counter_key[cnt];
- counter_dump[len++] = counter_data[cnt];
- }
- cnt = COUNTER_VOLTAGE;
- if (counter_enabled[cnt]) {
- counter_dump[len++] = counter_key[cnt];
- counter_dump[len++] = counter_data[cnt];
+ cnt = COUNTER_VOLTAGE;
+ if (counter_enabled[cnt]) {
+ counter_dump[len++] = counter_key[cnt];
+ counter_dump[len++] = counter_data[cnt];
+ }
}
+#endif
if (buffer) {
*buffer = (int *)counter_dump;
@@ -704,8 +723,10 @@ static struct gator_interface gator_events_mali_interface = {
extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
{
+#ifdef DVFS_REPORTED_BY_DDK
counter_data[COUNTER_FREQUENCY] = frequency_mhz;
counter_data[COUNTER_VOLTAGE] = voltage_mv;
+#endif
}
int gator_events_mali_init(void)
@@ -714,7 +735,7 @@ int gator_events_mali_init(void)
pr_debug("gator: mali init\n");
- for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) {
+ for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
counter_enabled[cnt] = 0;
counter_event[cnt] = 0;
counter_key[cnt] = gator_events_get_key();
diff --git a/driver/gator_events_mali_400.h b/driver/gator_events_mali_4xx.h
index 43aec49..413ad0f 100644
--- a/driver/gator_events_mali_400.h
+++ b/driver/gator_events_mali_4xx.h
@@ -8,11 +8,11 @@
*/
/*
- * Header contains common definitions for the Mali-400 processors.
+ * Header contains common definitions for the Mali-4xx processors.
*/
-#if !defined(GATOR_EVENTS_MALI_400_H)
-#define GATOR_EVENTS_MALI_400_H
+#if !defined(GATOR_EVENTS_MALI_4xx_H)
+#define GATOR_EVENTS_MALI_4xx_H
extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1);
-#endif /* GATOR_EVENTS_MALI_400_H */
+#endif /* GATOR_EVENTS_MALI_4xx_H */
diff --git a/driver/gator_events_mali_common.c b/driver/gator_events_mali_common.c
index 22a517d..5a98b37 100644
--- a/driver/gator_events_mali_common.c
+++ b/driver/gator_events_mali_common.c
@@ -20,8 +20,8 @@ extern const char *gator_mali_get_mali_name(void)
switch (id) {
case MALI_T6xx:
return "Mali-T6xx";
- case MALI_400:
- return "Mali-400";
+ case MALI_4xx:
+ return "Mali-4xx";
default:
pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
return "Mali-Unknown";
diff --git a/driver/gator_events_mali_common.h b/driver/gator_events_mali_common.h
index 27eaacc..d67ee2d 100644
--- a/driver/gator_events_mali_common.h
+++ b/driver/gator_events_mali_common.h
@@ -19,7 +19,7 @@
#include <asm/io.h>
/* Device codes for each known GPU */
-#define MALI_400 (0x0b07)
+#define MALI_4xx (0x0b07)
#define MALI_T6xx (0x0056)
/* Ensure that MALI_SUPPORT has been defined to something. */
@@ -40,9 +40,9 @@ typedef struct {
} mali_counter;
/*
- * Mali-400
+ * Mali-4xx
*/
-typedef void mali_profiling_set_event_type(unsigned int, unsigned int);
+typedef int mali_profiling_set_event_type(unsigned int, int);
typedef void mali_osk_fb_control_set_type(unsigned int, unsigned int);
typedef void mali_profiling_control_type(unsigned int, unsigned int);
typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
@@ -50,13 +50,13 @@ typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, un
/*
* Driver entry points for functions called directly by gator.
*/
-extern void _mali_profiling_set_event(unsigned int, unsigned int);
+extern int _mali_profiling_set_event(unsigned int, int);
extern void _mali_osk_fb_control_set(unsigned int, unsigned int);
extern void _mali_profiling_control(unsigned int, unsigned int);
extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
/**
- * Returns a name which identifies the GPU type (eg Mali-400, Mali-T6xx).
+ * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
*
* @return The name as a constant string.
*/
diff --git a/driver/gator_events_mali_t6xx_hw.c b/driver/gator_events_mali_t6xx_hw.c
index 4f49c1d..f557350 100644
--- a/driver/gator_events_mali_t6xx_hw.c
+++ b/driver/gator_events_mali_t6xx_hw.c
@@ -304,11 +304,11 @@ static const char *const hardware_counter_names[] = {
"",
"",
"",
- "MMU_TABLE_WALK",
- "MMU_REPLAY_MISS",
- "MMU_REPLAY_FULL",
- "MMU_NEW_MISS",
"MMU_HIT",
+ "MMU_NEW_MISS",
+ "MMU_REPLAY_FULL",
+ "MMU_REPLAY_MISS",
+ "MMU_TABLE_WALK",
"",
"",
"",
@@ -316,11 +316,11 @@ static const char *const hardware_counter_names[] = {
"",
"",
"",
- "UTLB_STALL",
- "UTLB_REPLAY_MISS",
- "UTLB_REPLAY_FULL",
- "UTLB_NEW_MISS",
"UTLB_HIT",
+ "UTLB_NEW_MISS",
+ "UTLB_REPLAY_FULL",
+ "UTLB_REPLAY_MISS",
+ "UTLB_STALL",
"",
"",
"",
diff --git a/driver/gator_events_mmaped.c b/driver/gator_events_mmaped.c
index 0027564..f7670f6 100644
--- a/driver/gator_events_mmaped.c
+++ b/driver/gator_events_mmaped.c
@@ -13,9 +13,9 @@
*
* <counter_set name="mmaped_cnt" count="3"/>
* <category name="mmaped" counter_set="mmaped_cnt" per_cpu="no">
- * <event event="0x0" title="Simulated" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
- * <event event="0x1" title="Simulated" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
- * <event event="0x2" title="Simulated" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ * <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
+ * <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
+ * <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
* </category>
*/
diff --git a/driver/gator_hrtimer_gator.c b/driver/gator_hrtimer_gator.c
index 8c35d49..b0c947a 100644
--- a/driver/gator_hrtimer_gator.c
+++ b/driver/gator_hrtimer_gator.c
@@ -13,6 +13,7 @@
void (*callback)(void);
DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
+DEFINE_PER_CPU(ktime_t, hrtimer_expire);
DEFINE_PER_CPU(int, hrtimer_is_active);
static ktime_t profiling_interval;
static void gator_hrtimer_online(void);
@@ -20,7 +21,9 @@ static void gator_hrtimer_offline(void);
static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
{
- hrtimer_forward_now(hrtimer, profiling_interval);
+ int cpu = get_logical_cpu();
+ hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval);
+ per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval);
(*callback)();
return HRTIMER_RESTART;
}
@@ -34,12 +37,13 @@ static void gator_hrtimer_online(void)
return;
per_cpu(hrtimer_is_active, cpu) = 1;
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer->function = gator_hrtimer_notify;
#ifdef CONFIG_PREEMPT_RT_BASE
hrtimer->irqsafe = 1;
#endif
- hrtimer_start(hrtimer, profiling_interval, HRTIMER_MODE_REL_PINNED);
+ per_cpu(hrtimer_expire, cpu) = ktime_add(hrtimer->base->get_time(), profiling_interval);
+ hrtimer_start(hrtimer, per_cpu(hrtimer_expire, cpu), HRTIMER_MODE_ABS_PINNED);
}
static void gator_hrtimer_offline(void)
diff --git a/driver/gator_iks.c b/driver/gator_iks.c
index 932be26..24233d7 100644
--- a/driver/gator_iks.c
+++ b/driver/gator_iks.c
@@ -14,18 +14,36 @@
#include <asm/smp_plat.h>
#include <trace/events/power_cpu_migrate.h>
+static bool map_cpuids;
static int mpidr_cpuids[NR_CPUS];
+static const struct gator_cpu * mpidr_cpus[NR_CPUS];
static int __lcpu_to_pcpu[NR_CPUS];
+static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
+{
+ int i;
+
+ for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+ const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+ if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0) {
+ return gator_cpu;
+ }
+ }
+
+ return NULL;
+}
+
static void calc_first_cluster_size(void)
{
int len;
const u32 *val;
+ const char *compatible;
struct device_node *cn = NULL;
int mpidr_cpuids_count = 0;
// Zero is a valid cpuid, so initialize the array to 0xff's
memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids));
+ memset(&mpidr_cpus, 0, sizeof(mpidr_cpus));
while ((cn = of_find_node_by_type(cn, "cpu"))) {
BUG_ON(mpidr_cpuids_count >= NR_CPUS);
@@ -35,12 +53,18 @@ static void calc_first_cluster_size(void)
pr_err("%s missing reg property\n", cn->full_name);
continue;
}
+ compatible = of_get_property(cn, "compatible", NULL);
+ if (compatible == NULL) {
+ pr_err("%s missing compatible property\n", cn->full_name);
+ continue;
+ }
mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val);
+ mpidr_cpus[mpidr_cpuids_count] = gator_find_cpu_by_dt_name(compatible);
++mpidr_cpuids_count;
}
- BUG_ON(mpidr_cpuids_count != nr_cpu_ids);
+ map_cpuids = (mpidr_cpuids_count == nr_cpu_ids);
}
static int linearize_mpidr(int mpidr)
@@ -58,6 +82,10 @@ static int linearize_mpidr(int mpidr)
int lcpu_to_pcpu(const int lcpu)
{
int pcpu;
+
+ if (!map_cpuids)
+ return lcpu;
+
BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
pcpu = __lcpu_to_pcpu[lcpu];
BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
@@ -67,6 +95,10 @@ int lcpu_to_pcpu(const int lcpu)
int pcpu_to_lcpu(const int pcpu)
{
int lcpu;
+
+ if (!map_cpuids)
+ return pcpu;
+
BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) {
if (__lcpu_to_pcpu[lcpu] == pcpu) {
@@ -111,9 +143,24 @@ GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid))
gator_update_cpu_mapping(cpu_hwid);
}
+static void gator_send_iks_core_names(void)
+{
+ int cpu;
+ // Send the cpu names
+ for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+ if (mpidr_cpus[cpu] != NULL) {
+ gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]);
+ }
+ }
+}
+
static int gator_migrate_start(void)
{
int retval = 0;
+
+ if (!map_cpuids)
+ return retval;
+
if (retval == 0)
retval = GATOR_REGISTER_TRACE(cpu_migrate_begin);
if (retval == 0)
@@ -130,6 +177,9 @@ static int gator_migrate_start(void)
static void gator_migrate_stop(void)
{
+ if (!map_cpuids)
+ return;
+
GATOR_UNREGISTER_TRACE(cpu_migrate_current);
GATOR_UNREGISTER_TRACE(cpu_migrate_finish);
GATOR_UNREGISTER_TRACE(cpu_migrate_begin);
@@ -138,6 +188,7 @@ static void gator_migrate_stop(void)
#else
#define calc_first_cluster_size()
+#define gator_send_iks_core_names()
#define gator_migrate_start() 0
#define gator_migrate_stop()
diff --git a/driver/gator_main.c b/driver/gator_main.c
index 46fe31d..7dd70d9 100644
--- a/driver/gator_main.c
+++ b/driver/gator_main.c
@@ -8,7 +8,7 @@
*/
// This version must match the gator daemon version
-static unsigned long gator_protocol_version = 14;
+static unsigned long gator_protocol_version = 16;
#include <linux/slab.h>
#include <linux/cpu.h>
@@ -64,13 +64,13 @@ static unsigned long gator_protocol_version = 14;
#define NAME_BUFFER_SIZE (64*1024)
#define COUNTER_BUFFER_SIZE (64*1024) // counters have the core as part of the data and the core value in the frame header may be discarded
#define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
-#define ANNOTATE_BUFFER_SIZE (64*1024) // annotate counters have the core as part of the data and the core value in the frame header may be discarded
+#define ANNOTATE_BUFFER_SIZE (128*1024) // annotate counters have the core as part of the data and the core value in the frame header may be discarded
#define SCHED_TRACE_BUFFER_SIZE (128*1024)
#define GPU_TRACE_BUFFER_SIZE (64*1024) // gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
#define IDLE_BUFFER_SIZE (32*1024) // idle counters have the core as part of the data and the core value in the frame header may be discarded
#define NO_COOKIE 0U
-#define INVALID_COOKIE ~0U
+#define UNRESOLVED_COOKIE ~0U
#define FRAME_SUMMARY 1
#define FRAME_BACKTRACE 2
@@ -93,6 +93,7 @@ static unsigned long gator_protocol_version = 14;
#define MESSAGE_SCHED_SWITCH 1
#define MESSAGE_SCHED_EXIT 2
+#define MESSAGE_SCHED_START 3
#define MESSAGE_IDLE_ENTER 1
#define MESSAGE_IDLE_EXIT 2
@@ -167,7 +168,7 @@ static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x);
static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len);
static void gator_buffer_write_string(int cpu, int buftype, const char *x);
static void gator_add_trace(int cpu, unsigned long address);
-static void gator_add_sample(int cpu, struct pt_regs *const regs);
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time);
static u64 gator_get_time(void);
// Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup.
@@ -185,11 +186,8 @@ static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit);
static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
// The buffer. Allocated in gator_op_setup
static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
-
-#if GATOR_LIVE
// The time after which the buffer should be committed for live display
static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
-#endif
/******************************************************************************
* Application Includes
@@ -198,11 +196,11 @@ static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
#include "gator_hrtimer_perf.c"
#include "gator_hrtimer_gator.c"
#include "gator_cookies.c"
+#include "gator_annotate.c"
#include "gator_trace_sched.c"
#include "gator_trace_power.c"
#include "gator_trace_gpu.c"
#include "gator_backtrace.c"
-#include "gator_annotate.c"
#include "gator_fs.c"
#include "gator_pack.c"
@@ -215,24 +213,28 @@ const struct gator_cpu gator_cpus[] = {
.cpuid = ARM1136,
.core_name = "ARM1136",
.pmnc_name = "ARM_ARM11",
+ .dt_name = "arm,arm1136",
.pmnc_counters = 3,
},
{
.cpuid = ARM1156,
.core_name = "ARM1156",
.pmnc_name = "ARM_ARM11",
+ .dt_name = "arm,arm1156",
.pmnc_counters = 3,
},
{
.cpuid = ARM1176,
.core_name = "ARM1176",
.pmnc_name = "ARM_ARM11",
+ .dt_name = "arm,arm1176",
.pmnc_counters = 3,
},
{
.cpuid = ARM11MPCORE,
.core_name = "ARM11MPCore",
.pmnc_name = "ARM_ARM11MPCore",
+ .dt_name = "arm,arm11mpcore",
.pmnc_counters = 3,
},
{
@@ -240,6 +242,7 @@ const struct gator_cpu gator_cpus[] = {
.core_name = "Cortex-A5",
.pmu_name = "ARMv7_Cortex_A5",
.pmnc_name = "ARM_Cortex-A5",
+ .dt_name = "arm,cortex-a5",
.pmnc_counters = 2,
},
{
@@ -247,6 +250,7 @@ const struct gator_cpu gator_cpus[] = {
.core_name = "Cortex-A7",
.pmu_name = "ARMv7_Cortex_A7",
.pmnc_name = "ARM_Cortex-A7",
+ .dt_name = "arm,cortex-a7",
.pmnc_counters = 4,
},
{
@@ -254,6 +258,7 @@ const struct gator_cpu gator_cpus[] = {
.core_name = "Cortex-A8",
.pmu_name = "ARMv7_Cortex_A8",
.pmnc_name = "ARM_Cortex-A8",
+ .dt_name = "arm,cortex-a8",
.pmnc_counters = 4,
},
{
@@ -261,6 +266,15 @@ const struct gator_cpu gator_cpus[] = {
.core_name = "Cortex-A9",
.pmu_name = "ARMv7_Cortex_A9",
.pmnc_name = "ARM_Cortex-A9",
+ .dt_name = "arm,cortex-a9",
+ .pmnc_counters = 6,
+ },
+ {
+ .cpuid = CORTEX_A12,
+ .core_name = "Cortex-A12",
+ .pmu_name = "ARMv7_Cortex_A12",
+ .pmnc_name = "ARM_Cortex-A12",
+ .dt_name = "arm,cortex-a12",
.pmnc_counters = 6,
},
{
@@ -268,6 +282,7 @@ const struct gator_cpu gator_cpus[] = {
.core_name = "Cortex-A15",
.pmu_name = "ARMv7_Cortex_A15",
.pmnc_name = "ARM_Cortex-A15",
+ .dt_name = "arm,cortex-a15",
.pmnc_counters = 6,
},
{
@@ -304,12 +319,14 @@ const struct gator_cpu gator_cpus[] = {
.cpuid = CORTEX_A53,
.core_name = "Cortex-A53",
.pmnc_name = "ARM_Cortex-A53",
+ .dt_name = "arm,cortex-a53",
.pmnc_counters = 6,
},
{
.cpuid = CORTEX_A57,
.core_name = "Cortex-A57",
.pmnc_name = "ARM_Cortex-A57",
+ .dt_name = "arm,cortex-a57",
.pmnc_counters = 6,
},
{
@@ -491,13 +508,11 @@ static void gator_commit_buffer(int cpu, int buftype, u64 time)
per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
-#if GATOR_LIVE
if (gator_live_rate > 0) {
while (time > per_cpu(gator_buffer_commit_time, cpu)) {
per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
}
}
-#endif
marshal_frame(cpu, buftype);
@@ -521,14 +536,14 @@ static void gator_add_trace(int cpu, unsigned long address)
off_t offset = 0;
unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
- if (cookie == NO_COOKIE || cookie == INVALID_COOKIE) {
+ if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE) {
offset = address;
}
marshal_backtrace(offset & ~1, cookie);
}
-static void gator_add_sample(int cpu, struct pt_regs *const regs)
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
{
bool inKernel;
unsigned long exec_cookie;
@@ -539,7 +554,7 @@ static void gator_add_sample(int cpu, struct pt_regs *const regs)
inKernel = !user_mode(regs);
exec_cookie = get_exec_cookie(cpu, current);
- if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel))
+ if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel, time))
return;
if (inKernel) {
@@ -553,7 +568,7 @@ static void gator_add_sample(int cpu, struct pt_regs *const regs)
arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
}
- marshal_backtrace_footer();
+ marshal_backtrace_footer(time);
}
/******************************************************************************
@@ -567,14 +582,15 @@ static void gator_timer_interrupt(void)
void gator_backtrace_handler(struct pt_regs *const regs)
{
+ u64 time = gator_get_time();
int cpu = get_physical_cpu();
// Output backtrace
- gator_add_sample(cpu, regs);
+ gator_add_sample(cpu, regs, time);
// Collect counters
if (!per_cpu(collecting, cpu)) {
- collect_counters();
+ collect_counters(time);
}
}
@@ -640,6 +656,25 @@ static void gator_timer_stop(void)
}
}
+#if defined(__arm__) || defined(__aarch64__)
+static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu) {
+ const char *core_name = NULL;
+ char core_name_buf[32];
+
+ if (!sent_core_name[cpu]) {
+ if (gator_cpu != NULL) {
+ core_name = gator_cpu->core_name;
+ } else {
+ snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+ core_name = core_name_buf;
+ }
+
+ marshal_core_name(cpu, cpuid, core_name);
+ sent_core_name[cpu] = true;
+ }
+}
+#endif
+
// This function runs in interrupt context and on the appropriate core
static void gator_timer_online(void *migrate)
{
@@ -669,20 +704,8 @@ static void gator_timer_online(void *migrate)
#if defined(__arm__) || defined(__aarch64__)
if (!sent_core_name[cpu]) {
- const char *core_name = NULL;
const u32 cpuid = gator_cpuid();
- const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
- char core_name_buf[32];
-
- if (gator_cpu != NULL) {
- core_name = gator_cpu->core_name;
- } else {
- snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
- core_name = core_name_buf;
- }
-
- marshal_core_name(cpuid, core_name);
- sent_core_name[cpu] = true;
+ gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid));
}
#endif
}
@@ -699,6 +722,8 @@ static void gator_timer_online_dispatch(int cpu, bool migrate)
}
}
+#include "gator_iks.c"
+
int gator_timer_start(unsigned long sample_rate)
{
int cpu;
@@ -718,6 +743,7 @@ int gator_timer_start(unsigned long sample_rate)
if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
return -1;
+ gator_send_iks_core_names();
for_each_online_cpu(cpu) {
gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
}
@@ -740,6 +766,7 @@ static u64 gator_get_time(void)
// getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases.
// up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution.
+ // This doesn't work well with interrupts, but that it's OK - the real concern is to catch big jumps in time
prev_timestamp = per_cpu(last_timestamp, cpu);
if (prev_timestamp <= timestamp) {
per_cpu(last_timestamp, cpu) = timestamp;
@@ -759,8 +786,6 @@ static u64 gator_get_time(void)
/******************************************************************************
* cpu hotplug and pm notifiers
******************************************************************************/
-#include "gator_iks.c"
-
static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
int cpu = lcpu_to_pcpu((long)hcpu);
@@ -865,7 +890,7 @@ static void gator_summary(void)
gator_monotonic_started = gator_get_time();
local_irq_restore(flags);
- marshal_summary(timestamp, uptime, uname_buf);
+ marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf);
}
int gator_events_install(struct gator_interface *interface)
@@ -1062,9 +1087,7 @@ static int gator_op_setup(void)
per_cpu(gator_buffer_write, cpu)[i] = 0;
per_cpu(gator_buffer_commit, cpu)[i] = 0;
per_cpu(buffer_space_available, cpu)[i] = true;
-#if GATOR_LIVE
per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
-#endif
// Annotation is a special case that only uses a single buffer
if (cpu > 0 && i == ANNOTATE_BUF) {
@@ -1138,9 +1161,7 @@ static void gator_shutdown(void)
per_cpu(gator_buffer_write, cpu)[i] = 0;
per_cpu(gator_buffer_commit, cpu)[i] = 0;
per_cpu(buffer_space_available, cpu)[i] = true;
-#if GATOR_LIVE
per_cpu(gator_buffer_commit_time, cpu) = 0;
-#endif
}
mutex_unlock(&gator_buffer_mutex);
}
@@ -1297,7 +1318,7 @@ static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t
if (buftype == ANNOTATE_BUF) {
wake_up(&gator_annotate_wait);
}
- } while (buffer_commit_ready(&cpu, &buftype));
+ } while (buffer_commit_ready(&cpu, &buftype));
mutex_unlock(&gator_buffer_mutex);
diff --git a/driver/gator_marshaling.c b/driver/gator_marshaling.c
index 72dd5ef..3282de8 100644
--- a/driver/gator_marshaling.c
+++ b/driver/gator_marshaling.c
@@ -19,7 +19,11 @@
/* Add another character so the length isn't 0x0a bytes */ \
"5"
-static void marshal_summary(long long timestamp, long long uptime, const char * uname)
+#ifdef MALI_SUPPORT
+#include "gator_events_mali_common.h"
+#endif
+
+static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char * uname)
{
unsigned long flags;
int cpu = 0;
@@ -28,12 +32,24 @@ static void marshal_summary(long long timestamp, long long uptime, const char *
gator_buffer_write_string(cpu, SUMMARY_BUF, NEWLINE_CANARY);
gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, timestamp);
gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, uptime);
+ gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta);
gator_buffer_write_string(cpu, SUMMARY_BUF, "uname");
gator_buffer_write_string(cpu, SUMMARY_BUF, uname);
#if GATOR_IKS_SUPPORT
gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
gator_buffer_write_string(cpu, SUMMARY_BUF, "");
#endif
+ // Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release.
+#ifdef MALI_SUPPORT
+ gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type");
+#if (MALI_SUPPORT == MALI_4xx)
+ gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx");
+#elif (MALI_SUPPORT == MALI_T6xx)
+ gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx");
+#else
+ gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown");
+#endif
+#endif
gator_buffer_write_string(cpu, SUMMARY_BUF, "");
// Commit the buffer now so it can be one of the first frames read by Streamline
gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
@@ -73,23 +89,23 @@ static void marshal_thread_name(int pid, char *name)
local_irq_restore(flags);
}
-static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel)
+static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel, u64 time)
{
int cpu = get_physical_cpu();
- u64 time = gator_get_time();
- if (buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
- gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
- gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
- gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
- gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
- gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel);
- return true;
+ if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
+ // Check and commit; commit is set to occur once buffer is 3/4 full
+ buffer_check(cpu, BACKTRACE_BUF, time);
+
+ return false;
}
- // Check and commit; commit is set to occur once buffer is 3/4 full
- buffer_check(cpu, BACKTRACE_BUF, time);
+ gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
+ gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
+ gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
+ gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
+ gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel);
- return false;
+ return true;
}
static void marshal_backtrace(unsigned long address, int cookie)
@@ -99,13 +115,13 @@ static void marshal_backtrace(unsigned long address, int cookie)
gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address);
}
-static void marshal_backtrace_footer(void)
+static void marshal_backtrace_footer(u64 time)
{
int cpu = get_physical_cpu();
gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE);
// Check and commit; commit is set to occur once buffer is 3/4 full
- buffer_check(cpu, BACKTRACE_BUF, gator_get_time());
+ buffer_check(cpu, BACKTRACE_BUF, time);
}
static bool marshal_event_header(u64 time)
@@ -239,6 +255,28 @@ static void marshal_sched_gpu_stop(int unit, int core)
local_irq_restore(flags);
}
+static void marshal_sched_trace_start(int tgid, int pid, int cookie)
+{
+ unsigned long cpu = get_physical_cpu(), flags;
+ u64 time;
+
+ if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+ return;
+
+ local_irq_save(flags);
+ time = gator_get_time();
+ if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+ gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START);
+ gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+ gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
+ gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+ gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
+ }
+ // Check and commit; commit is set to occur once buffer is 3/4 full
+ buffer_check(cpu, SCHED_TRACE_BUF, time);
+ local_irq_restore(flags);
+}
+
static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
{
unsigned long cpu = get_physical_cpu(), flags;
@@ -357,17 +395,19 @@ static void marshal_frame(int cpu, int buftype)
}
#if defined(__arm__) || defined(__aarch64__)
-static void marshal_core_name(const int cpuid, const char *name)
+static void marshal_core_name(const int core, const int cpuid, const char *name)
{
int cpu = get_physical_cpu();
unsigned long flags;
local_irq_save(flags);
if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) {
gator_buffer_write_packed_int(cpu, NAME_BUF, HRTIMER_CORE_NAME);
+ gator_buffer_write_packed_int(cpu, NAME_BUF, core);
gator_buffer_write_packed_int(cpu, NAME_BUF, cpuid);
gator_buffer_write_string(cpu, NAME_BUF, name);
}
- buffer_check(cpu, NAME_BUF, gator_get_time());
+ // Commit core names now so that they can show up in live
+ gator_commit_buffer(cpu, NAME_BUF, gator_get_time());
local_irq_restore(flags);
}
#endif
diff --git a/driver/gator_trace_gpu.c b/driver/gator_trace_gpu.c
index 61ecbe3..12623c4 100644
--- a/driver/gator_trace_gpu.c
+++ b/driver/gator_trace_gpu.c
@@ -40,21 +40,27 @@ enum {
NUMBER_OF_GPU_UNITS
};
-#define MALI_400 (0x0b07)
+#define MALI_4xx (0x0b07)
#define MALI_T6xx (0x0056)
struct mali_gpu_job {
int count;
- int last_core;
int last_tgid;
int last_pid;
+ int last_job_id;
};
#define NUMBER_OF_GPU_CORES 16
static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES];
static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
-static void mali_gpu_enqueue(int unit, int core, int tgid, int pid)
+/* Only one event should be running on a unit and core at a time (ie, a start
+ * event can only be followed by a stop and vice versa), but because the kernel
+ * only knows when a job is enqueued and not started, it is possible for a
+ * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
+ * stop2 by queueing up start2 and releasing it when stop1 is received.
+ */
+static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id)
{
int count;
@@ -63,23 +69,23 @@ static void mali_gpu_enqueue(int unit, int core, int tgid, int pid)
BUG_ON(count < 0);
++mali_gpu_jobs[unit][core].count;
if (count) {
- mali_gpu_jobs[unit][core].last_core = core;
mali_gpu_jobs[unit][core].last_tgid = tgid;
mali_gpu_jobs[unit][core].last_pid = pid;
+ mali_gpu_jobs[unit][core].last_job_id = job_id;
}
spin_unlock(&mali_gpu_jobs_lock);
if (!count) {
- marshal_sched_gpu_start(unit, core, tgid, pid);
+ marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/);
}
}
static void mali_gpu_stop(int unit, int core)
{
int count;
- int last_core = 0;
int last_tgid = 0;
int last_pid = 0;
+ int last_job_id = 0;
spin_lock(&mali_gpu_jobs_lock);
if (mali_gpu_jobs[unit][core].count == 0) {
@@ -89,20 +95,20 @@ static void mali_gpu_stop(int unit, int core)
--mali_gpu_jobs[unit][core].count;
count = mali_gpu_jobs[unit][core].count;
if (count) {
- last_core = mali_gpu_jobs[unit][core].last_core;
last_tgid = mali_gpu_jobs[unit][core].last_tgid;
last_pid = mali_gpu_jobs[unit][core].last_pid;
+ last_job_id = mali_gpu_jobs[unit][core].last_job_id;
}
spin_unlock(&mali_gpu_jobs_lock);
marshal_sched_gpu_stop(unit, core);
if (count) {
- marshal_sched_gpu_start(unit, last_core, last_tgid, last_pid);
+ marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/);
}
}
#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
-#include "gator_events_mali_400.h"
+#include "gator_events_mali_4xx.h"
/*
* Taken from MALI_PROFILING_EVENT_CHANNEL_* in Mali DDK.
@@ -141,10 +147,10 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
case EVENT_TYPE_START:
if (component == EVENT_CHANNEL_VP0) {
/* tgid = d0; pid = d1; */
- mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1);
+ mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0);
} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
/* tgid = d0; pid = d1; */
- mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1);
+ mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0);
}
break;
@@ -173,9 +179,16 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
#endif
#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+#if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
+#else
GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid))
+#endif
{
unsigned int component, state, unit;
+#if !defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+ unsigned char job_id = 0;
+#endif
component = (event_id >> 16) & 0xFF; // component is an 8-bit field
state = (event_id >> 24) & 0xF; // state is a 4-bit field
@@ -197,7 +210,7 @@ GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigne
if (unit != GPU_UNIT_NONE) {
switch (state) {
case EVENT_TYPE_START:
- mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid));
+ mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id);
break;
case EVENT_TYPE_STOP:
mali_gpu_stop(unit, 0);
@@ -214,7 +227,7 @@ GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigne
GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p))
{
- mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid);
+ mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0);
}
GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core))
@@ -229,6 +242,7 @@ int gator_trace_gpu_start(void)
* Absence of gpu trace points is not an error
*/
+ memset(&mali_gpu_jobs, sizeof(mali_gpu_jobs), 0);
gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
diff --git a/driver/gator_trace_sched.c b/driver/gator_trace_sched.c
index e989f6a..e98815e 100644
--- a/driver/gator_trace_sched.c
+++ b/driver/gator_trace_sched.c
@@ -10,9 +10,6 @@
#include <trace/events/sched.h>
#include "gator.h"
-#define SCHED_SWITCH 1
-#define SCHED_PROCESS_EXIT 2
-
#define TASK_MAP_ENTRIES 1024 /* must be power of 2 */
#define TASK_MAX_COLLISIONS 2
@@ -92,14 +89,12 @@ void emit_pid_name(struct task_struct *task)
}
}
-static void collect_counters(void)
+static void collect_counters(u64 time)
{
int *buffer, len, cpu = get_physical_cpu();
long long *buffer64;
struct gator_interface *gi;
- u64 time;
- time = gator_get_time();
if (marshal_event_header(time)) {
list_for_each_entry(gi, &gator_events, list) {
if (gi->read) {
@@ -113,49 +108,19 @@ static void collect_counters(void)
// Only check after writing all counters so that time and corresponding counters appear in the same frame
buffer_check(cpu, BLOCK_COUNTER_BUF, time);
-#if GATOR_LIVE
// Commit buffers on timeout
if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
static const int buftypes[] = { COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
int i;
- for (i = 0; i < sizeof(buftypes)/sizeof(buftypes[0]); ++i) {
+ for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
gator_commit_buffer(cpu, buftypes[i], time);
}
+ // Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full
+ if (on_primary_core() && spin_trylock(&annotate_lock)) {
+ gator_commit_buffer(0, ANNOTATE_BUF, time);
+ spin_unlock(&annotate_lock);
+ }
}
-#endif
- }
-}
-
-static void probe_sched_write(int type, struct task_struct *task, struct task_struct *old_task)
-{
- int cookie = 0, state = 0;
- int cpu = get_physical_cpu();
- int tgid = task->tgid;
- int pid = task->pid;
-
- if (type == SCHED_SWITCH) {
- // do as much work as possible before disabling interrupts
- cookie = get_exec_cookie(cpu, task);
- emit_pid_name(task);
- if (old_task->state == TASK_RUNNING) {
- state = STATE_CONTENTION;
- } else if (old_task->in_iowait) {
- state = STATE_WAIT_ON_IO;
- } else {
- state = STATE_WAIT_ON_OTHER;
- }
-
- per_cpu(collecting, cpu) = 1;
- collect_counters();
- per_cpu(collecting, cpu) = 0;
- }
-
- // marshal_sched_trace() disables interrupts as the free may trigger while switch is writing to the buffer; disabling preemption is not sufficient
- // is disable interrupts necessary now that exit is used instead of free?
- if (type == SCHED_SWITCH) {
- marshal_sched_trace_switch(tgid, pid, cookie, state);
- } else {
- marshal_sched_trace_exit(tgid, pid);
}
}
@@ -165,18 +130,48 @@ static void trace_sched_insert_idle(void)
marshal_sched_trace_switch(0, 0, 0, 0);
}
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+ int cookie;
+ int cpu = get_physical_cpu();
+
+ cookie = get_exec_cookie(cpu, child);
+ emit_pid_name(child);
+
+ marshal_sched_trace_start(child->tgid, child->pid, cookie);
+}
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
#else
GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
#endif
{
- probe_sched_write(SCHED_SWITCH, next, prev);
+ int cookie;
+ int state;
+ int cpu = get_physical_cpu();
+
+ // do as much work as possible before disabling interrupts
+ cookie = get_exec_cookie(cpu, next);
+ emit_pid_name(next);
+ if (prev->state == TASK_RUNNING) {
+ state = STATE_CONTENTION;
+ } else if (prev->in_iowait) {
+ state = STATE_WAIT_ON_IO;
+ } else {
+ state = STATE_WAIT_ON_OTHER;
+ }
+
+ per_cpu(collecting, cpu) = 1;
+ collect_counters(gator_get_time());
+ per_cpu(collecting, cpu) = 0;
+
+ marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
}
-GATOR_DEFINE_PROBE(sched_process_exit, TP_PROTO(struct task_struct *p))
+GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
{
- probe_sched_write(SCHED_PROCESS_EXIT, p, 0);
+ marshal_sched_trace_exit(p->tgid, p->pid);
}
static void do_nothing(void *info)
@@ -188,10 +183,12 @@ static void do_nothing(void *info)
static int register_scheduler_tracepoints(void)
{
// register tracepoints
+ if (GATOR_REGISTER_TRACE(sched_process_fork))
+ goto fail_sched_process_fork;
if (GATOR_REGISTER_TRACE(sched_switch))
goto fail_sched_switch;
- if (GATOR_REGISTER_TRACE(sched_process_exit))
- goto fail_sched_process_exit;
+ if (GATOR_REGISTER_TRACE(sched_process_free))
+ goto fail_sched_process_free;
pr_debug("gator: registered tracepoints\n");
// Now that the scheduler tracepoint is registered, force a context switch
@@ -201,9 +198,11 @@ static int register_scheduler_tracepoints(void)
return 0;
// unregister tracepoints on error
-fail_sched_process_exit:
+fail_sched_process_free:
GATOR_UNREGISTER_TRACE(sched_switch);
fail_sched_switch:
+ GATOR_UNREGISTER_TRACE(sched_process_fork);
+fail_sched_process_fork:
pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
return -1;
@@ -231,8 +230,9 @@ void gator_trace_sched_offline(void)
static void unregister_scheduler_tracepoints(void)
{
+ GATOR_UNREGISTER_TRACE(sched_process_fork);
GATOR_UNREGISTER_TRACE(sched_switch);
- GATOR_UNREGISTER_TRACE(sched_process_exit);
+ GATOR_UNREGISTER_TRACE(sched_process_free);
pr_debug("gator: unregistered tracepoints\n");
}
diff --git a/driver/mali/mali_mjollnir_profiling_gator_api.h b/driver/mali/mali_mjollnir_profiling_gator_api.h
new file mode 100644
index 0000000..3db4543
--- /dev/null
+++ b/driver/mali/mali_mjollnir_profiling_gator_api.h
@@ -0,0 +1,164 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2013 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+#define __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/*
+ * The number of processor cores. Update to suit your hardware implementation.
+ */
+#define MAX_NUM_FP_CORES (4)
+#define MAX_NUM_VP_CORES (1)
+#define MAX_NUM_L2_CACHE_CORES (1)
+
+enum counters
+{
+ /* Timeline activity */
+ ACTIVITY_VP_0 = 0,
+ ACTIVITY_FP_0,
+ ACTIVITY_FP_1,
+ ACTIVITY_FP_2,
+ ACTIVITY_FP_3,
+
+ /* L2 cache counters */
+ COUNTER_L2_0_C0,
+ COUNTER_L2_0_C1,
+
+ /* Vertex processor counters */
+ COUNTER_VP_0_C0,
+ COUNTER_VP_0_C1,
+
+ /* Fragment processor counters */
+ COUNTER_FP_0_C0,
+ COUNTER_FP_0_C1,
+ COUNTER_FP_1_C0,
+ COUNTER_FP_1_C1,
+ COUNTER_FP_2_C0,
+ COUNTER_FP_2_C1,
+ COUNTER_FP_3_C0,
+ COUNTER_FP_3_C1,
+
+ /* EGL Software Counters */
+ COUNTER_EGL_BLIT_TIME,
+
+ /* GLES Software Counters */
+ COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+ COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+ COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+ COUNTER_GLES_DRAW_ARRAYS_CALLS,
+ COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+ COUNTER_GLES_DRAW_POINTS,
+ COUNTER_GLES_DRAW_LINES,
+ COUNTER_GLES_DRAW_LINE_LOOP,
+ COUNTER_GLES_DRAW_LINE_STRIP,
+ COUNTER_GLES_DRAW_TRIANGLES,
+ COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+ COUNTER_GLES_DRAW_TRIANGLE_FAN,
+ COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+ COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+ COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+ COUNTER_GLES_UPLOAD_VBO_TIME,
+ COUNTER_GLES_NUM_FLUSHES,
+ COUNTER_GLES_NUM_VSHADERS_GENERATED,
+ COUNTER_GLES_NUM_FSHADERS_GENERATED,
+ COUNTER_GLES_VSHADER_GEN_TIME,
+ COUNTER_GLES_FSHADER_GEN_TIME,
+ COUNTER_GLES_INPUT_TRIANGLES,
+ COUNTER_GLES_VXCACHE_HIT,
+ COUNTER_GLES_VXCACHE_MISS,
+ COUNTER_GLES_VXCACHE_COLLISION,
+ COUNTER_GLES_CULLED_TRIANGLES,
+ COUNTER_GLES_CULLED_LINES,
+ COUNTER_GLES_BACKFACE_TRIANGLES,
+ COUNTER_GLES_GBCLIP_TRIANGLES,
+ COUNTER_GLES_GBCLIP_LINES,
+ COUNTER_GLES_TRIANGLES_DRAWN,
+ COUNTER_GLES_DRAWCALL_TIME,
+ COUNTER_GLES_TRIANGLES_COUNT,
+ COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+ COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+ COUNTER_GLES_FAN_TRIANGLES_COUNT,
+ COUNTER_GLES_LINES_COUNT,
+ COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+ COUNTER_GLES_STRIP_LINES_COUNT,
+ COUNTER_GLES_LOOP_LINES_COUNT,
+
+ COUNTER_FILMSTRIP,
+ COUNTER_FREQUENCY,
+ COUNTER_VOLTAGE,
+
+ NUMBER_OF_EVENTS
+};
+
+#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT ACTIVITY_FP_3
+
+#define FIRST_HW_COUNTER COUNTER_L2_0_C0
+#define LAST_HW_COUNTER COUNTER_FP_3_C1
+
+#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT
+
+/* Signifies that the system is able to report voltage and frequency numbers. */
+#define DVFS_REPORTED_BY_DDK 1
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+ u32 source0;
+ u32 value0;
+ u32 source1;
+ u32 value1;
+} _mali_profiling_core_counters;
+
+/*
+ * For compatibility with utgard.
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+ struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+typedef struct _mali_profiling_mali_version
+{
+ u32 mali_product_id;
+ u32 mali_version_major;
+ u32 mali_version_minor;
+ u32 num_of_l2_cores;
+ u32 num_of_fp_cores;
+ u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+extern u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+/*
+ * List of possible actions allowing DDK to be controlled by Streamline.
+ * The following numbers are used by DDK to control the frame buffer dumping.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ */
diff --git a/driver/mali/mali_utgard_profiling_gator_api.h b/driver/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 0000000..c02a1a4
--- /dev/null
+++ b/driver/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,202 @@
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2013 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+/** The list of events supported by the Mali DDK. */
+typedef enum
+{
+ /* Vertex processor activity */
+ ACTIVITY_VP_0 = 0,
+
+ /* Fragment processor activity */
+ ACTIVITY_FP_0, /* 1 */
+ ACTIVITY_FP_1,
+ ACTIVITY_FP_2,
+ ACTIVITY_FP_3,
+ ACTIVITY_FP_4,
+ ACTIVITY_FP_5,
+ ACTIVITY_FP_6,
+ ACTIVITY_FP_7,
+
+ /* L2 cache counters */
+ COUNTER_L2_0_C0,
+ COUNTER_L2_0_C1,
+ COUNTER_L2_1_C0,
+ COUNTER_L2_1_C1,
+ COUNTER_L2_2_C0,
+ COUNTER_L2_2_C1,
+
+ /* Vertex processor counters */
+ COUNTER_VP_0_C0, /*15*/
+ COUNTER_VP_0_C1,
+
+ /* Fragment processor counters */
+ COUNTER_FP_0_C0,
+ COUNTER_FP_0_C1,
+ COUNTER_FP_1_C0,
+ COUNTER_FP_1_C1,
+ COUNTER_FP_2_C0,
+ COUNTER_FP_2_C1,
+ COUNTER_FP_3_C0,
+ COUNTER_FP_3_C1,
+ COUNTER_FP_4_C0,
+ COUNTER_FP_4_C1,
+ COUNTER_FP_5_C0,
+ COUNTER_FP_5_C1,
+ COUNTER_FP_6_C0,
+ COUNTER_FP_6_C1,
+ COUNTER_FP_7_C0,
+ COUNTER_FP_7_C1, /* 32 */
+
+ /*
+ * If more hardware counters are added, the _mali_osk_hw_counter_table
+ * below should also be updated.
+ */
+
+ /* EGL software counters */
+ COUNTER_EGL_BLIT_TIME,
+
+ /* GLES software counters */
+ COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+ COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+ COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+ COUNTER_GLES_DRAW_ARRAYS_CALLS,
+ COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+ COUNTER_GLES_DRAW_POINTS,
+ COUNTER_GLES_DRAW_LINES,
+ COUNTER_GLES_DRAW_LINE_LOOP,
+ COUNTER_GLES_DRAW_LINE_STRIP,
+ COUNTER_GLES_DRAW_TRIANGLES,
+ COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+ COUNTER_GLES_DRAW_TRIANGLE_FAN,
+ COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+ COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+ COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+ COUNTER_GLES_UPLOAD_VBO_TIME,
+ COUNTER_GLES_NUM_FLUSHES,
+ COUNTER_GLES_NUM_VSHADERS_GENERATED,
+ COUNTER_GLES_NUM_FSHADERS_GENERATED,
+ COUNTER_GLES_VSHADER_GEN_TIME,
+ COUNTER_GLES_FSHADER_GEN_TIME,
+ COUNTER_GLES_INPUT_TRIANGLES,
+ COUNTER_GLES_VXCACHE_HIT,
+ COUNTER_GLES_VXCACHE_MISS,
+ COUNTER_GLES_VXCACHE_COLLISION,
+ COUNTER_GLES_CULLED_TRIANGLES,
+ COUNTER_GLES_CULLED_LINES,
+ COUNTER_GLES_BACKFACE_TRIANGLES,
+ COUNTER_GLES_GBCLIP_TRIANGLES,
+ COUNTER_GLES_GBCLIP_LINES,
+ COUNTER_GLES_TRIANGLES_DRAWN,
+ COUNTER_GLES_DRAWCALL_TIME,
+ COUNTER_GLES_TRIANGLES_COUNT,
+ COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+ COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+ COUNTER_GLES_FAN_TRIANGLES_COUNT,
+ COUNTER_GLES_LINES_COUNT,
+ COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+ COUNTER_GLES_STRIP_LINES_COUNT,
+ COUNTER_GLES_LOOP_LINES_COUNT,
+
+ /* Framebuffer capture pseudo-counter */
+ COUNTER_FILMSTRIP,
+
+ NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER COUNTER_L2_0_C0
+#define LAST_HW_COUNTER COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER COUNTER_FILMSTRIP
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+ u32 source0;
+ u32 value0;
+ u32 source1;
+ u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+ struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version
+{
+ u32 mali_product_id;
+ u32 mali_version_major;
+ u32 mali_version_minor;
+ u32 num_of_l2_cores;
+ u32 num_of_fp_cores;
+ u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */