aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2013-09-14 14:06:43 -0400
committerRob Clark <robdclark@gmail.com>2014-03-04 16:07:44 -0500
commit41bfd7556d05140780adae7b3a7152d277abfed0 (patch)
tree56d21935f391a0222940c701e493e775c04c141d
parent948762cbb103a4c55dba59a0241030d7c5c1f93a (diff)
WIP: perf/timechart: support for GPUfreedreno/ifc6410-drm
Show GPU as another "CPU" on the timeline, so we can see where it is busy and idle. Also show pending flips (green bar on the GPU), as well as places where processes are blocked on a fence (yellow bar on the process). Currently a bit hacky. And looking for drm/msm specific traces. We should probably make a set of generic traces that can be used by all drm drivers, and use these trace events in perf. (For other GPU's we probably want a way to be able to visualize activity on multiple rings.)
-rw-r--r--tools/perf/builtin-timechart.c238
-rw-r--r--tools/perf/util/svghelper.c91
-rw-r--r--tools/perf/util/svghelper.h3
3 files changed, 326 insertions, 6 deletions
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3b75b2e21ea55..bb68af061ca82 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -365,6 +365,117 @@ struct sched_switch {
int next_prio;
};
+/*
+ * GPU trace entries:
+ */
+
+/* flip_request/flip_complete: */
+struct flip_entry {
+ struct trace_entry te;
+ int id; /* crtc id */
+ void *obj;
+};
+
+/* fence_wait_request/fence_wait_complete: */
+struct fence_entry {
+ struct trace_entry te;
+ uint32_t fence;
+ int ret; /* only valid for fence_wait_complete */
+};
+
+/* gpu_request/gpu_complete: */
+struct gpu_entry {
+ struct trace_entry te;
+ uint32_t fence;
+};
+
+/*
+ * GPU trace events:
+ */
+
+struct flip_event {
+ struct flip_event *next;
+ u64 time;
+ int id;
+ void *obj; /* GEM object ptr */
+ int complete;
+};
+
+struct fence_event {
+ struct fence_event *next;
+ u64 time;
+ uint32_t fence;
+ int pid;
+ int ret; /* only valid for fence_wait_complete */
+ int complete;
+};
+
+struct gpu_event {
+ struct gpu_event *next;
+ u64 time;
+ uint32_t fence;
+ int complete;
+};
+
+static struct flip_event *flip_events;
+static struct fence_event *fence_events;
+static struct gpu_event *gpu_events;
+
+static void
+flip(int cpu, u64 timestamp, int pid, struct trace_entry *te, int complete)
+{
+ struct flip_event *fe;
+ struct flip_entry *fence = (void *)te;
+
+ fe = calloc(sizeof(*fe), 1);
+ if (!fe)
+ return;
+
+ fe->time = timestamp;
+ fe->id = fence->id;
+ fe->obj = fence->obj;
+ fe->complete = complete;
+ fe->next = flip_events;
+ flip_events = fe;
+}
+
+static void
+fence(int cpu, u64 timestamp, int pid, struct trace_entry *te, int complete)
+{
+ struct fence_event *fe;
+ struct fence_entry *fence = (void *)te;
+
+ fe = calloc(sizeof(*fe), 1);
+ if (!fe)
+ return;
+
+ fe->time = timestamp;
+ fe->pid = pid;
+ fe->fence = fence->fence;
+ fe->complete = complete;
+ if (complete)
+ fe->ret = fence->ret;
+ fe->next = fence_events;
+ fence_events = fe;
+}
+
+static void
+gpu(int cpu, u64 timestamp, int pid, struct trace_entry *te, int complete)
+{
+ struct gpu_event *ge;
+ struct gpu_entry *gpu = (void *)te;
+
+ ge = calloc(sizeof(*ge), 1);
+ if (!ge)
+ return;
+
+ ge->time = timestamp;
+ ge->fence = gpu->fence;
+ ge->complete = complete;
+ ge->next = gpu_events;
+ gpu_events = ge;
+}
+
static void c_state_start(int cpu, u64 timestamp, int state)
{
cpus_cstate_start_times[cpu] = timestamp;
@@ -550,6 +661,19 @@ static int process_sample_event(struct perf_tool *tool __used,
else if (strcmp(event_str, "sched:sched_switch") == 0)
sched_switch(sample->cpu, sample->time, te);
+ else if (strcmp(event_str, "msm:msm_flip_request") == 0)
+ flip(sample->cpu, sample->time, sample->pid, te, 0);
+ else if (strcmp(event_str, "msm:msm_flip_complete") == 0)
+ flip(sample->cpu, sample->time, sample->pid, te, 1);
+ else if (strcmp(event_str, "msm:msm_fence_wait_request") == 0)
+ fence(sample->cpu, sample->time, sample->pid, te, 0);
+ else if (strcmp(event_str, "msm:msm_fence_wait_complete") == 0)
+ fence(sample->cpu, sample->time, sample->pid, te, 1);
+ else if (strcmp(event_str, "msm:msm_gpu_request") == 0)
+ gpu(sample->cpu, sample->time, sample->pid, te, 0);
+ else if (strcmp(event_str, "msm:msm_gpu_complete") == 0)
+ gpu(sample->cpu, sample->time, sample->pid, te, 1);
+
#ifdef SUPPORT_OLD_POWER_EVENTS
if (use_old_power_events) {
if (strcmp(event_str, "power:power_start") == 0)
@@ -780,6 +904,93 @@ static void draw_cpu_usage(void)
}
}
+static void draw_gpu_usage(void)
+{
+ struct gpu_event *ge, *prev = NULL;
+ uint32_t submitted_fence = 0;
+ uint32_t completed_fence = 0;
+ char str[32];
+
+ /* remember that we are processing events in reverse order here and
+ * 'prev' is the event following 'ge' in time:
+ */
+
+ for (ge = gpu_events; ge; prev = ge, ge = ge->next) {
+ if (prev) {
+ if (prev->complete)
+ completed_fence = prev->fence;
+ else
+ submitted_fence = prev->fence;
+
+ if (submitted_fence > completed_fence) {
+ if ((submitted_fence - completed_fence) == 1)
+ snprintf(str, sizeof(str), "fence: %u", submitted_fence);
+ else
+ snprintf(str, sizeof(str), "fence: %u-%u", completed_fence + 1, submitted_fence);
+ svg_process(numcpus, ge->time, prev->time, "sample", str);
+
+ }
+ }
+ }
+}
+
+static void draw_fence_bars(void)
+{
+ struct per_pid *p;
+ int Y = 0;
+
+ Y = 2 * numcpus + 2;
+
+ p = all_data;
+ while (p) {
+ struct per_pidcomm *c;
+ struct fence_event *fe, *prev = NULL;
+
+ c = p->all;
+
+ /* remember that we are processing events in reverse order here and
+ * 'prev' is the event following 'fe' in time:
+ *
+ * XXX this probably isn't quite right.. we need to use the pidcomm
+ * that is active during the timespan of the wait request/complete
+ * to deal w/ pid recycling..
+ */
+
+ for (fe = fence_events; fe; prev = fe, fe = fe->next) {
+ if (fe->pid != p->pid)
+ continue;
+ if (prev && prev->complete && !fe->complete)
+ svg_fence(Y, fe->time, prev->time, fe->fence, prev->ret);
+ }
+
+ while (c) {
+ if (c->display)
+ Y++;
+ c = c->next;
+ }
+ p = p->next;
+ }
+
+
+}
+
+static void draw_flips(void)
+{
+ struct flip_event *fe, *prev = NULL;
+
+ /* remember that we are processing events in reverse order here and
+ * 'prev' is the event following 'fe' in time:
+ *
+ * TODO make this support more than one crtc..
+ */
+
+ for (fe = flip_events; fe; prev = fe, fe = fe->next) {
+ if (prev && prev->complete && !fe->complete)
+ svg_flip(numcpus, fe->time, prev->time, fe->id, fe->obj);
+ }
+}
+
+
static void draw_process_bars(void)
{
struct per_pid *p;
@@ -964,7 +1175,7 @@ static void write_svg_file(const char *filename)
if (count < 15)
count = determine_display_tasks(TIME_THRESH / 10);
- open_svg(filename, numcpus, count, first_time, last_time);
+ open_svg(filename, numcpus+1, count, first_time, last_time);
svg_time_grid();
svg_legenda();
@@ -972,8 +1183,19 @@ static void write_svg_file(const char *filename)
for (i = 0; i < numcpus; i++)
svg_cpu_box(i, max_freq, turbo_frequency);
+ svg_gpu_box(numcpus);
+
draw_cpu_usage();
+
+ draw_gpu_usage();
+ draw_flips();
+
+ // XXX ugly hack.. update places that put process after
+ // numcpus -> numcpus+numgpus
+ numcpus++;
+
draw_process_bars();
+ draw_fence_bars();
draw_c_p_states();
draw_wakeups();
@@ -1034,6 +1256,13 @@ static const char * const record_old_args[] = {
"-e", "power:power_frequency",
"-e", "sched:sched_wakeup",
"-e", "sched:sched_switch",
+ /* TODO only enable for gpu record? */
+ "-e", "msm:msm_flip_request",
+ "-e", "msm:msm_flip_complete",
+ "-e", "msm:msm_fence_wait_request",
+ "-e", "msm:msm_fence_wait_complete",
+ "-e", "msm:msm_gpu_request",
+ "-e", "msm:msm_gpu_complete",
};
#endif
@@ -1047,6 +1276,13 @@ static const char * const record_new_args[] = {
"-e", "power:cpu_idle",
"-e", "sched:sched_wakeup",
"-e", "sched:sched_switch",
+ /* TODO only enable for gpu record? */
+ "-e", "msm:msm_flip_request",
+ "-e", "msm:msm_flip_complete",
+ "-e", "msm:msm_fence_wait_request",
+ "-e", "msm:msm_fence_wait_complete",
+ "-e", "msm:msm_gpu_request",
+ "-e", "msm:msm_gpu_complete",
};
static int __cmd_record(int argc, const char **argv)
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 96c866045d60d..45bd2a569dbb8 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -115,6 +115,10 @@ void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
fprintf(svgfile, " rect.c5 { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
fprintf(svgfile, " rect.c6 { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; } \n");
fprintf(svgfile, " line.pstate { stroke:rgb(255,255, 0); stroke-opacity:0.8; stroke-width:2; } \n");
+ fprintf(svgfile, " rect.flip { fill:rgb( 0,255, 0); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " line.flip { stroke:rgb( 0,255, 0); stroke-opacity:0.8; stroke-width:9; } \n");
+ fprintf(svgfile, " rect.fence { fill:rgb(255,255, 0); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " line.fence { stroke:rgb(255,255, 0); stroke-opacity:0.8; stroke-width:9; } \n");
fprintf(svgfile, " ]]>\n </style>\n</defs>\n");
}
@@ -215,6 +219,10 @@ static char *cpu_model(void)
strncpy(cpu_m, &buf[13], 255);
break;
}
+ if (strstr(buf, "Processor")) {
+ strncpy(cpu_m, &buf[12], 255);
+ break;
+ }
}
fclose(file);
}
@@ -255,6 +263,26 @@ void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
}
+void svg_gpu_box(int cpu)
+{
+ char cpu_string[80];
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"cpu\"/>\n",
+ time2pixels(first_time),
+ time2pixels(last_time)-time2pixels(first_time),
+ cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+ sprintf(cpu_string, "GPU"); /* might be nice to know gpu name somehow?? */
+ fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
+
+ fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\" font-size=\"1.25pt\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, "adreno 320");
+}
+
void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name)
{
double width;
@@ -349,6 +377,57 @@ void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
}
+void svg_flip(int cpu, u64 start, u64 end, int crtc, const void *obj)
+{
+ double height, width;
+
+ if (!svgfile)
+ return;
+
+ width = time2pixels(end)-time2pixels(start);
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+ width /= 1.5;
+ height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 8.0;
+
+ fprintf(svgfile, "<line x1=\"%4.8f\" x2=\"%4.8f\" y1=\"%4.1f\" y2=\"%4.1f\" class=\"flip\"/>\n",
+ time2pixels(start), time2pixels(end), height, height);
+
+ if (width > MIN_TEXT_SIZE) {
+ fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"%3.8fpt\">gem:</text>\n",
+ time2pixels(start), height-0.2, width);
+ fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"%3.8fpt\">%p</text>\n",
+ time2pixels(start), height+width, width, obj);
+ }
+}
+
+void svg_fence(int Yslot, u64 start, u64 end, u32 fence, int ret)
+{
+ double height, width;
+
+ if (!svgfile)
+ return;
+
+ width = time2pixels(end)-time2pixels(start);
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+ width /= 1.5;
+ height = Yslot * SLOT_MULT+SLOT_HEIGHT - 8.0;
+
+ fprintf(svgfile, "<line x1=\"%4.8f\" x2=\"%4.8f\" y1=\"%4.1f\" y2=\"%4.1f\" class=\"fence\"/>\n",
+ time2pixels(start), time2pixels(end), height, height);
+
+ if (width > MIN_TEXT_SIZE) {
+ fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"%3.8fpt\">fence:</text>\n",
+ time2pixels(start), height-0.2, width);
+ fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"%3.8fpt\">%u (%d)</text>\n",
+ time2pixels(start), height+width, width, fence, ret);
+ }
+}
void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2)
{
@@ -457,11 +536,13 @@ void svg_legenda(void)
svg_legenda_box(0, "Running", "sample");
svg_legenda_box(100, "Idle","c1");
- svg_legenda_box(200, "Deeper Idle", "c3");
- svg_legenda_box(350, "Deepest Idle", "c6");
- svg_legenda_box(550, "Sleeping", "process2");
- svg_legenda_box(650, "Waiting for cpu", "waiting");
- svg_legenda_box(800, "Blocked on IO", "blocked");
+ svg_legenda_box(175, "Deeper Idle", "c3");
+ svg_legenda_box(300, "Deepest Idle", "c6");
+ svg_legenda_box(425, "Sleeping", "process2");
+ svg_legenda_box(525, "Waiting for cpu", "waiting");
+ svg_legenda_box(650, "Blocked on IO", "blocked");
+ svg_legenda_box(775, "Waiting Fence", "fence");
+ svg_legenda_box(900, "Pending flip", "flip");
}
void svg_time_grid(void)
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index e0781989cc31f..72f07e0e71a95 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h
@@ -8,11 +8,14 @@ extern void svg_box(int Yslot, u64 start, u64 end, const char *type);
extern void svg_sample(int Yslot, int cpu, u64 start, u64 end);
extern void svg_waiting(int Yslot, u64 start, u64 end);
extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+void svg_gpu_box(int cpu);
extern void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name);
extern void svg_cstate(int cpu, u64 start, u64 end, int type);
extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+void svg_flip(int cpu, u64 start, u64 end, int crtc, const void *obj);
+void svg_fence(int Yslot, u64 start, u64 end, u32 fence, int ret);
extern void svg_time_grid(void);