include/trace/events/gpu.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

#undef TRACE_SYSTEM
#define TRACE_SYSTEM gpu

#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_GPU_H

#include <linux/tracepoint.h>
#include <linux/time.h>

#define show_secs_from_ns(ns) \
	({ \
		u64 t = ns + (NSEC_PER_USEC / 2); \
		do_div(t, NSEC_PER_SEC); \
		t; \
	})

#define show_usecs_from_ns(ns) \
	({ \
		u64 t = ns + (NSEC_PER_USEC / 2) ; \
		u32 rem; \
		do_div(t, NSEC_PER_USEC); \
		rem = do_div(t, USEC_PER_SEC); \
	})

/*
 * The gpu_sched_switch event indicates that a switch from one GPU context to
 * another occurred on one of the GPU hardware blocks.
 *
 * The gpu_name argument identifies the GPU hardware block.  Each independently
 * scheduled GPU hardware block should have a different name.  This may be used
 * in different ways for different GPUs.  For example, if a GPU includes
 * multiple processing cores it may use names "GPU 0", "GPU 1", etc.  If a GPU
 * includes a separately scheduled 2D and 3D hardware block, it might use the
 * names "2D" and "3D".
 *
 * The timestamp argument is the timestamp at which the switch occurred on the
 * GPU. These timestamps are in units of nanoseconds and must use
 * approximately the same time as sched_clock, though they need not come from
 * any CPU clock. The timestamps for a single hardware block must be
 * monotonically nondecreasing.  This means that if a variable compensation
 * offset is used to translate from some other clock to the sched_clock, then
 * care must be taken when increasing that offset, and doing so may result in
 * multiple events with the same timestamp.
 *
 * The next_ctx_id argument identifies the next context that was running on
 * the GPU hardware block.  A value of 0 indicates that the hardware block
 * will be idle.
 *
 * The next_prio argument indicates the priority of the next context at the
 * time of the event.  The exact numeric values may mean different things for
 * different GPUs, but they should follow the rule that lower values indicate a
 * higher priority.
 *
 * The next_job_id argument identifies the batch of work that the GPU will be
 * working on.  This should correspond to a job_id that was previously traced
 * as a gpu_job_enqueue event when the batch of work was created.
 */
TRACE_EVENT(gpu_sched_switch,

	TP_PROTO(const char *gpu_name, u64 timestamp,
		u32 next_ctx_id, s32 next_prio, u32 next_job_id),

	TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id),

	TP_STRUCT__entry(
		__string(       gpu_name,       gpu_name        )
		__field(        u64,            timestamp       )
		__field(        u32,            next_ctx_id     )
		__field(        s32,            next_prio       )
		__field(        u32,            next_job_id     )
	),

	TP_fast_assign(
		__assign_str(gpu_name, gpu_name);
		__entry->timestamp = timestamp;
		__entry->next_ctx_id = next_ctx_id;
		__entry->next_prio = next_prio;
		__entry->next_job_id = next_job_id;
	),

	TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld "
		"next_job_id=%lu",
		__get_str(gpu_name),
		(unsigned long long)show_secs_from_ns(__entry->timestamp),
		(unsigned long)show_usecs_from_ns(__entry->timestamp),
		(unsigned long)__entry->next_ctx_id,
		(long)__entry->next_prio,
		(unsigned long)__entry->next_job_id)
);

/*
 * The gpu_job_enqueue event indicates that a batch of work has been queued up
 * to be processed by the GPU.  This event is not intended to indicate that
 * the batch of work has been submitted to the GPU hardware, but rather that
 * it has been submitted to the GPU kernel driver.
 *
 * This event should be traced on the thread that initiated the work being
 * queued.  For example, if a batch of work is submitted to the kernel by a
 * userland thread, the event should be traced on that thread.
 *
 * The ctx_id field identifies the GPU context in which the batch of work
 * being queued is to be run.
 *
 * The job_id field identifies the batch of work being queued within the given
 * GPU context.  The first batch of work submitted for a given GPU context
 * should have a job_id of 0, and each subsequent batch of work should
 * increment the job_id by 1.
 *
 * The type field identifies the type of the job being enqueued.  The job
 * types may be different for different GPU hardware.  For example, a GPU may
 * differentiate between "2D", "3D", and "compute" jobs.
 */
TRACE_EVENT(gpu_job_enqueue,

	TP_PROTO(u32 ctx_id, u32 job_id, const char *type),

	TP_ARGS(ctx_id, job_id, type),

	TP_STRUCT__entry(
		__field(        u32,            ctx_id          )
		__field(        u32,            job_id          )
		__string(       type,           type            )
	),

	TP_fast_assign(
		__entry->ctx_id = ctx_id;
		__entry->job_id = job_id;
		__assign_str(type, type);
	),

	TP_printk("ctx_id=%lu job_id=%lu type=%s",
		(unsigned long)__entry->ctx_id,
		(unsigned long)__entry->job_id,
		__get_str(type))
);

#undef show_secs_from_ns
#undef show_usecs_from_ns

#endif /* _TRACE_GPU_H */

/* This part must be outside protection */
#include <trace/define_trace.h>