aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ftrace.c326
-rw-r--r--kernel/trace/ring_buffer.c7
-rw-r--r--kernel/trace/trace.c456
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_events.c321
-rw-r--r--kernel/trace/trace_events_filter.c17
-rw-r--r--kernel/trace/trace_export.c7
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_kprobe.c42
-rw-r--r--kernel/trace/trace_syscalls.c21
-rw-r--r--kernel/trace/trace_uprobe.c55
12 files changed, 941 insertions, 322 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6c508ff33c6..4b93b841225 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -85,6 +85,8 @@ int function_trace_stop __read_mostly;
/* Current function tracing op */
struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
+/* What to set function_trace_op to */
+static struct ftrace_ops *set_function_trace_op;
/* List for set_ftrace_pid's pids. */
LIST_HEAD(ftrace_pids);
@@ -278,6 +280,29 @@ static void update_global_ops(void)
global_ops.func = func;
}
+static void ftrace_sync(struct work_struct *work)
+{
+ /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ *
+ * Yes, function tracing is rude.
+ */
+}
+
+static void ftrace_sync_ipi(void *data)
+{
+ /* Probably not needed, but do it anyway */
+ smp_rmb();
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void update_function_graph_func(void);
+#else
+static inline void update_function_graph_func(void) { }
+#endif
+
static void update_ftrace_function(void)
{
ftrace_func_t func;
@@ -296,16 +321,61 @@ static void update_ftrace_function(void)
!FTRACE_FORCE_LIST_FUNC)) {
/* Set the ftrace_ops that the arch callback uses */
if (ftrace_ops_list == &global_ops)
- function_trace_op = ftrace_global_list;
+ set_function_trace_op = ftrace_global_list;
else
- function_trace_op = ftrace_ops_list;
+ set_function_trace_op = ftrace_ops_list;
func = ftrace_ops_list->func;
} else {
/* Just use the default ftrace_ops */
- function_trace_op = &ftrace_list_end;
+ set_function_trace_op = &ftrace_list_end;
func = ftrace_ops_list_func;
}
+ /* If there's no change, then do nothing more here */
+ if (ftrace_trace_function == func)
+ return;
+
+ update_function_graph_func();
+
+ /*
+ * If we are using the list function, it doesn't care
+ * about the function_trace_ops.
+ */
+ if (func == ftrace_ops_list_func) {
+ ftrace_trace_function = func;
+ /*
+ * Don't even bother setting function_trace_ops,
+ * it would be racy to do so anyway.
+ */
+ return;
+ }
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+ /*
+ * For static tracing, we need to be a bit more careful.
+ * The function change takes affect immediately. Thus,
+ * we need to coorditate the setting of the function_trace_ops
+ * with the setting of the ftrace_trace_function.
+ *
+ * Set the function to the list ops, which will call the
+ * function we want, albeit indirectly, but it handles the
+ * ftrace_ops and doesn't depend on function_trace_op.
+ */
+ ftrace_trace_function = ftrace_ops_list_func;
+ /*
+ * Make sure all CPUs see this. Yes this is slow, but static
+ * tracing is slow and nasty to have enabled.
+ */
+ schedule_on_each_cpu(ftrace_sync);
+ /* Now all cpus are using the list ops. */
+ function_trace_op = set_function_trace_op;
+ /* Make sure the function_trace_op is visible on all CPUs */
+ smp_wmb();
+ /* Nasty way to force a rmb on all cpus */
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
+ /* OK, we are all set to update the ftrace_trace_function now! */
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
ftrace_trace_function = func;
}
@@ -367,9 +437,6 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
if (FTRACE_WARN_ON(ops == &global_ops))
return -EINVAL;
@@ -417,9 +484,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
- if (ftrace_disabled)
- return -ENODEV;
-
if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
return -EBUSY;
@@ -434,16 +498,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
ret = remove_ftrace_list_ops(&ftrace_control_list,
&control_ops, ops);
- if (!ret) {
- /*
- * The ftrace_ops is now removed from the list,
- * so there'll be no new users. We must ensure
- * all current users are done before we free
- * the control data.
- */
- synchronize_sched();
- control_ops_free(ops);
- }
} else
ret = remove_ftrace_ops(&ftrace_ops_list, ops);
@@ -453,13 +507,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_enabled)
update_ftrace_function();
- /*
- * Dynamic ops may be freed, we must make sure that all
- * callers are done before leaving this function.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- synchronize_sched();
-
return 0;
}
@@ -756,7 +803,7 @@ static int ftrace_profile_init(void)
int cpu;
int ret = 0;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
ret = ftrace_profile_init_cpu(cpu);
if (ret)
break;
@@ -1416,12 +1463,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
* the hashes are freed with call_rcu_sched().
*/
static int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
{
struct ftrace_hash *filter_hash;
struct ftrace_hash *notrace_hash;
int ret;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ /*
+ * There's a small race when adding ops that the ftrace handler
+ * that wants regs, may be called without them. We can not
+ * allow that handler to be called if regs is NULL.
+ */
+ if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS))
+ return 0;
+#endif
+
filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
@@ -1948,8 +2005,14 @@ void ftrace_modify_all_code(int command)
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (command & FTRACE_UPDATE_TRACE_FUNC)
+ if (command & FTRACE_UPDATE_TRACE_FUNC) {
+ function_trace_op = set_function_trace_op;
+ smp_wmb();
+ /* If irqs are disabled, we are in stop machine */
+ if (!irqs_disabled())
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
ftrace_update_ftrace_func(ftrace_trace_function);
+ }
if (command & FTRACE_START_FUNC_RET)
ftrace_enable_ftrace_graph_caller();
@@ -2038,10 +2101,15 @@ static void ftrace_startup_enable(int command)
static int ftrace_startup(struct ftrace_ops *ops, int command)
{
bool hash_enable = true;
+ int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
+ ret = __register_ftrace_function(ops);
+ if (ret)
+ return ret;
+
ftrace_start_up++;
command |= FTRACE_UPDATE_CALLS;
@@ -2063,12 +2131,17 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return 0;
}
-static void ftrace_shutdown(struct ftrace_ops *ops, int command)
+static int ftrace_shutdown(struct ftrace_ops *ops, int command)
{
bool hash_disable = true;
+ int ret;
if (unlikely(ftrace_disabled))
- return;
+ return -ENODEV;
+
+ ret = __unregister_ftrace_function(ops);
+ if (ret)
+ return ret;
ftrace_start_up--;
/*
@@ -2102,10 +2175,42 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled)
- return;
+ if (!command || !ftrace_enabled) {
+ /*
+ * If these are control ops, they still need their
+ * per_cpu field freed. Since, function tracing is
+ * not currently active, we can just free them
+ * without synchronizing all CPUs.
+ */
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
+ return 0;
+ }
ftrace_run_update_code(command);
+
+ /*
+ * Dynamic ops may be freed, we must make sure that all
+ * callers are done before leaving this function.
+ * The same goes for freeing the per_cpu data of the control
+ * ops.
+ *
+ * Again, normal synchronize_sched() is not good enough.
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
+ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) {
+ schedule_on_each_cpu(ftrace_sync);
+
+ if (ops->flags & FTRACE_OPS_FL_CONTROL)
+ control_ops_free(ops);
+ }
+
+ return 0;
}
static void ftrace_startup_sysctl(void)
@@ -2134,12 +2239,57 @@ static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
-static int ops_traces_mod(struct ftrace_ops *ops)
+static inline int ops_traces_mod(struct ftrace_ops *ops)
{
- struct ftrace_hash *hash;
+ /*
+ * Filter_hash being empty will default to trace module.
+ * But notrace hash requires a test of individual module functions.
+ */
+ return ftrace_hash_empty(ops->filter_hash) &&
+ ftrace_hash_empty(ops->notrace_hash);
+}
- hash = ops->filter_hash;
- return ftrace_hash_empty(hash);
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static inline bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+ /* If ops isn't enabled, ignore it */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return 0;
+
+ /* If ops traces all mods, we already accounted for it */
+ if (ops_traces_mod(ops))
+ return 0;
+
+ /* The function must be in the filter */
+ if (!ftrace_hash_empty(ops->filter_hash) &&
+ !ftrace_lookup_ip(ops->filter_hash, rec->ip))
+ return 0;
+
+ /* If in notrace hash, we ignore it too */
+ if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
+ return 0;
+
+ return 1;
+}
+
+static int referenced_filters(struct dyn_ftrace *rec)
+{
+ struct ftrace_ops *ops;
+ int cnt = 0;
+
+ for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
+ if (ops_references_rec(ops, rec))
+ cnt++;
+ }
+
+ return cnt;
}
static int ftrace_update_code(struct module *mod)
@@ -2148,6 +2298,7 @@ static int ftrace_update_code(struct module *mod)
struct dyn_ftrace *p;
cycle_t start, stop;
unsigned long ref = 0;
+ bool test = false;
int i;
/*
@@ -2161,9 +2312,12 @@ static int ftrace_update_code(struct module *mod)
for (ops = ftrace_ops_list;
ops != &ftrace_list_end; ops = ops->next) {
- if (ops->flags & FTRACE_OPS_FL_ENABLED &&
- ops_traces_mod(ops))
- ref++;
+ if (ops->flags & FTRACE_OPS_FL_ENABLED) {
+ if (ops_traces_mod(ops))
+ ref++;
+ else
+ test = true;
+ }
}
}
@@ -2173,12 +2327,16 @@ static int ftrace_update_code(struct module *mod)
for (pg = ftrace_new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
+ int cnt = ref;
+
/* If something went wrong, bail without enabling anything */
if (unlikely(ftrace_disabled))
return -1;
p = &pg->records[i];
- p->flags = ref;
+ if (test)
+ cnt += referenced_filters(p);
+ p->flags = cnt;
/*
* Do the initial record conversion from mcount jump
@@ -2198,7 +2356,7 @@ static int ftrace_update_code(struct module *mod)
* conversion puts the module to the correct state, thus
* passing the ftrace_make_call check.
*/
- if (ftrace_start_up && ref) {
+ if (ftrace_start_up && cnt) {
int failed = __ftrace_replace_code(p, 1);
if (failed)
ftrace_bug(failed, p->ip);
@@ -2957,16 +3115,13 @@ static void __enable_ftrace_function_probe(void)
if (i == FTRACE_FUNC_HASHSIZE)
return;
- ret = __register_ftrace_function(&trace_probe_ops);
- if (!ret)
- ret = ftrace_startup(&trace_probe_ops, 0);
+ ret = ftrace_startup(&trace_probe_ops, 0);
ftrace_probe_registered = 1;
}
static void __disable_ftrace_function_probe(void)
{
- int ret;
int i;
if (!ftrace_probe_registered)
@@ -2979,9 +3134,7 @@ static void __disable_ftrace_function_probe(void)
}
/* no more funcs left */
- ret = __unregister_ftrace_function(&trace_probe_ops);
- if (!ret)
- ftrace_shutdown(&trace_probe_ops, 0);
+ ftrace_shutdown(&trace_probe_ops, 0);
ftrace_probe_registered = 0;
}
@@ -4178,17 +4331,20 @@ core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) \
- ({ \
- (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
- 0; \
+# define ftrace_startup(ops, command) \
+ ({ \
+ int ___ret = __register_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
})
-# define ftrace_shutdown(ops, command) do { } while (0)
+# define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops)
+
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
static inline int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
{
return 1;
}
@@ -4211,7 +4367,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
do_for_each_ftrace_op(op, ftrace_control_list) {
if (!(op->flags & FTRACE_OPS_FL_STUB) &&
!ftrace_function_local_disabled(op) &&
- ftrace_ops_test(op, ip))
+ ftrace_ops_test(op, ip, regs))
op->func(ip, parent_ip, op, regs);
} while_for_each_ftrace_op(op);
trace_recursion_clear(TRACE_CONTROL_BIT);
@@ -4244,7 +4400,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
do_for_each_ftrace_op(op, ftrace_ops_list) {
- if (ftrace_ops_test(op, ip))
+ if (ftrace_ops_test(op, ip, regs))
op->func(ip, parent_ip, op, regs);
} while_for_each_ftrace_op(op);
preempt_enable_notrace();
@@ -4583,9 +4739,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
mutex_lock(&ftrace_lock);
- ret = __register_ftrace_function(ops);
- if (!ret)
- ret = ftrace_startup(ops, 0);
+ ret = ftrace_startup(ops, 0);
mutex_unlock(&ftrace_lock);
@@ -4604,9 +4758,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
int ret;
mutex_lock(&ftrace_lock);
- ret = __unregister_ftrace_function(ops);
- if (!ret)
- ftrace_shutdown(ops, 0);
+ ret = ftrace_shutdown(ops, 0);
mutex_unlock(&ftrace_lock);
return ret;
@@ -4666,6 +4818,7 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
trace_func_graph_ret_t ftrace_graph_return =
(trace_func_graph_ret_t)ftrace_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -4800,6 +4953,37 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
return NOTIFY_DONE;
}
+/* Just a place holder for function graph */
+static struct ftrace_ops fgraph_ops __read_mostly = {
+ .func = ftrace_stub,
+ .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
+ FTRACE_OPS_FL_RECURSION_SAFE,
+};
+
+static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
+{
+ if (!ftrace_ops_test(&global_ops, trace->func, NULL))
+ return 0;
+ return __ftrace_graph_entry(trace);
+}
+
+/*
+ * The function graph tracer should only trace the functions defined
+ * by set_ftrace_filter and set_ftrace_notrace. If another function
+ * tracer ops is registered, the graph tracer requires testing the
+ * function against the global ops, and not just trace any function
+ * that any ftrace_ops registered.
+ */
+static void update_function_graph_func(void)
+{
+ if (ftrace_ops_list == &ftrace_list_end ||
+ (ftrace_ops_list == &global_ops &&
+ global_ops.next == &ftrace_list_end))
+ ftrace_graph_entry = __ftrace_graph_entry;
+ else
+ ftrace_graph_entry = ftrace_graph_entry_test;
+}
+
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
@@ -4824,9 +5008,18 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
}
ftrace_graph_return = retfunc;
- ftrace_graph_entry = entryfunc;
- ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
+ /*
+ * Update the indirect function to the entryfunc, and the
+ * function that gets called to the entry_test first. Then
+ * call the update fgraph entry function to determine if
+ * the entryfunc should be called directly or not.
+ */
+ __ftrace_graph_entry = entryfunc;
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ update_function_graph_func();
+
+ ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET);
out:
mutex_unlock(&ftrace_lock);
@@ -4843,7 +5036,8 @@ void unregister_ftrace_graph(void)
ftrace_graph_active--;
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
+ __ftrace_graph_entry = ftrace_graph_entry_stub;
+ ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e444ff88f0a..fd12cc56371 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2396,6 +2396,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
write &= RB_WRITE_MASK;
tail = write - length;
+ /*
+ * If this is the first commit on the page, then it has the same
+ * timestamp as the page itself.
+ */
+ if (!tail)
+ delta = 0;
+
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e71a8be4a6e..6dbdf277c8f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -193,6 +193,37 @@ static struct trace_array global_trace;
LIST_HEAD(ftrace_trace_arrays);
+int trace_array_get(struct trace_array *this_tr)
+{
+ struct trace_array *tr;
+ int ret = -ENODEV;
+
+ mutex_lock(&trace_types_lock);
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == this_tr) {
+ tr->ref++;
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
+static void __trace_array_put(struct trace_array *this_tr)
+{
+ WARN_ON(!this_tr->ref);
+ this_tr->ref--;
+}
+
+void trace_array_put(struct trace_array *this_tr)
+{
+ mutex_lock(&trace_types_lock);
+ __trace_array_put(this_tr);
+ mutex_unlock(&trace_types_lock);
+}
+
int filter_current_check_discard(struct ring_buffer *buffer,
struct ftrace_event_call *call, void *rec,
struct ring_buffer_event *event)
@@ -201,23 +232,43 @@ int filter_current_check_discard(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(filter_current_check_discard);
-cycle_t ftrace_now(int cpu)
+cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
u64 ts;
/* Early boot up does not have a buffer yet */
- if (!global_trace.trace_buffer.buffer)
+ if (!buf->buffer)
return trace_clock_local();
- ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
- ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
+ ts = ring_buffer_time_stamp(buf->buffer, cpu);
+ ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
return ts;
}
+cycle_t ftrace_now(int cpu)
+{
+ return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
+}
+
+/**
+ * tracing_is_enabled - Show if global_trace has been disabled
+ *
+ * Shows if the global trace has been enabled or not. It uses the
+ * mirror flag "buffer_disabled" to be used in fast paths such as for
+ * the irqsoff tracer. But it may be inaccurate due to races. If you
+ * need to know the accurate state, use tracing_is_on() which is a little
+ * slower, but accurate.
+ */
int tracing_is_enabled(void)
{
- return tracing_is_on();
+ /*
+ * For quick access (irqsoff uses this in fast path), just
+ * return the mirror variable of the state of the ring buffer.
+ * It's a little racy, but we don't really care.
+ */
+ smp_rmb();
+ return !global_trace.buffer_disabled;
}
/*
@@ -240,7 +291,7 @@ static struct tracer *trace_types __read_mostly;
/*
* trace_types_lock is used to protect the trace_types list.
*/
-static DEFINE_MUTEX(trace_types_lock);
+DEFINE_MUTEX(trace_types_lock);
/*
* serialize the access of the ring buffer
@@ -330,6 +381,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
+void tracer_tracing_on(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ ring_buffer_record_on(tr->trace_buffer.buffer);
+ /*
+ * This flag is looked at when buffers haven't been allocated
+ * yet, or by some tracers (like irqsoff), that just want to
+ * know if the ring buffer has been disabled, but it can handle
+ * races of where it gets disabled but we still do a record.
+ * As the check is in the fast path of the tracers, it is more
+ * important to be fast than accurate.
+ */
+ tr->buffer_disabled = 0;
+ /* Make the flag seen by readers */
+ smp_wmb();
+}
+
/**
* tracing_on - enable tracing buffers
*
@@ -338,15 +406,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
*/
void tracing_on(void)
{
- if (global_trace.trace_buffer.buffer)
- ring_buffer_record_on(global_trace.trace_buffer.buffer);
- /*
- * This flag is only looked at when buffers haven't been
- * allocated yet. We don't really care about the race
- * between setting this flag and actually turning
- * on the buffer.
- */
- global_trace.buffer_disabled = 0;
+ tracer_tracing_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_on);
@@ -364,6 +424,9 @@ int __trace_puts(unsigned long ip, const char *str, int size)
unsigned long irq_flags;
int alloc;
+ if (unlikely(tracing_selftest_running || tracing_disabled))
+ return 0;
+
alloc = sizeof(*entry) + size + 2; /* possible \n added */
local_save_flags(irq_flags);
@@ -404,6 +467,9 @@ int __trace_bputs(unsigned long ip, const char *str)
unsigned long irq_flags;
int size = sizeof(struct bputs_entry);
+ if (unlikely(tracing_selftest_running || tracing_disabled))
+ return 0;
+
local_save_flags(irq_flags);
buffer = global_trace.trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
@@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void)
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
#endif /* CONFIG_TRACER_SNAPSHOT */
+void tracer_tracing_off(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ ring_buffer_record_off(tr->trace_buffer.buffer);
+ /*
+ * This flag is looked at when buffers haven't been allocated
+ * yet, or by some tracers (like irqsoff), that just want to
+ * know if the ring buffer has been disabled, but it can handle
+ * races of where it gets disabled but we still do a record.
+ * As the check is in the fast path of the tracers, it is more
+ * important to be fast than accurate.
+ */
+ tr->buffer_disabled = 1;
+ /* Make the flag seen by readers */
+ smp_wmb();
+}
+
/**
* tracing_off - turn off tracing buffers
*
@@ -550,26 +633,29 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
*/
void tracing_off(void)
{
- if (global_trace.trace_buffer.buffer)
- ring_buffer_record_off(global_trace.trace_buffer.buffer);
- /*
- * This flag is only looked at when buffers haven't been
- * allocated yet. We don't really care about the race
- * between setting this flag and actually turning
- * on the buffer.
- */
- global_trace.buffer_disabled = 1;
+ tracer_tracing_off(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_off);
/**
+ * tracer_tracing_is_on - show real state of ring buffer enabled
+ * @tr : the trace array to know if ring buffer is enabled
+ *
+ * Shows real state of the ring buffer if it is enabled or not.
+ */
+int tracer_tracing_is_on(struct trace_array *tr)
+{
+ if (tr->trace_buffer.buffer)
+ return ring_buffer_record_is_on(tr->trace_buffer.buffer);
+ return !tr->buffer_disabled;
+}
+
+/**
* tracing_is_on - show state of ring buffers enabled
*/
int tracing_is_on(void)
{
- if (global_trace.trace_buffer.buffer)
- return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
- return !global_trace.buffer_disabled;
+ return tracer_tracing_is_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_is_on);
@@ -746,9 +832,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
if (isspace(ch)) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
- } else {
+ } else if (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
+ } else {
+ ret = -EINVAL;
+ goto out;
}
*ppos += read;
@@ -1119,7 +1208,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
/* Make sure all commits have finished */
synchronize_sched();
- buf->time_start = ftrace_now(buf->cpu);
+ buf->time_start = buffer_ftrace_now(buf, buf->cpu);
for_each_online_cpu(cpu)
ring_buffer_reset_cpu(buffer, cpu);
@@ -1127,23 +1216,17 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
ring_buffer_record_enable(buffer);
}
-void tracing_reset_current(int cpu)
-{
- tracing_reset(&global_trace.trace_buffer, cpu);
-}
-
+/* Must have trace_types_lock held */
void tracing_reset_all_online_cpus(void)
{
struct trace_array *tr;
- mutex_lock(&trace_types_lock);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
tracing_reset_online_cpus(&tr->max_buffer);
#endif
}
- mutex_unlock(&trace_types_lock);
}
#define SAVED_CMDLINES 128
@@ -2760,6 +2843,17 @@ static int s_show(struct seq_file *m, void *v)
return 0;
}
+/*
+ * Should be used after trace_array_get(), trace_types_lock
+ * ensures that i_cdev was already initialized.
+ */
+static inline int tracing_get_cpu(struct inode *inode)
+{
+ if (inode->i_cdev) /* See trace_create_cpu_file() */
+ return (long)inode->i_cdev - 1;
+ return RING_BUFFER_ALL_CPUS;
+}
+
static const struct seq_operations tracer_seq_ops = {
.start = s_start,
.next = s_next,
@@ -2770,8 +2864,7 @@ static const struct seq_operations tracer_seq_ops = {
static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
{
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
+ struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
int cpu;
@@ -2812,8 +2905,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
iter->trace_buffer = &tr->trace_buffer;
iter->snapshot = snapshot;
iter->pos = -1;
+ iter->cpu_file = tracing_get_cpu(inode);
mutex_init(&iter->mutex);
- iter->cpu_file = tc->cpu;
/* Notify the tracer early; before we stop tracing. */
if (iter->trace && iter->trace->open)
@@ -2850,8 +2943,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
tracing_iter_reset(iter, cpu);
}
- tr->ref++;
-
mutex_unlock(&trace_types_lock);
return iter;
@@ -2874,24 +2965,41 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
return 0;
}
+/*
+ * Open and update trace_array ref count.
+ * Must have the current trace_array passed to it.
+ */
+int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+{
+ struct trace_array *tr = inode->i_private;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
static int tracing_release(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
struct seq_file *m = file->private_data;
struct trace_iterator *iter;
- struct trace_array *tr;
int cpu;
- if (!(file->f_mode & FMODE_READ))
+ if (!(file->f_mode & FMODE_READ)) {
+ trace_array_put(tr);
return 0;
+ }
+ /* Writes do not use seq_file */
iter = m->private;
- tr = iter->tr;
-
mutex_lock(&trace_types_lock);
- WARN_ON(!tr->ref);
- tr->ref--;
-
for_each_tracing_cpu(cpu) {
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
@@ -2903,6 +3011,9 @@ static int tracing_release(struct inode *inode, struct file *file)
if (!iter->snapshot)
/* reenable tracing if it was previously enabled */
tracing_start_tr(tr);
+
+ __trace_array_put(tr);
+
mutex_unlock(&trace_types_lock);
mutex_destroy(&iter->mutex);
@@ -2910,24 +3021,44 @@ static int tracing_release(struct inode *inode, struct file *file)
kfree(iter->trace);
kfree(iter->buffer_iter);
seq_release_private(inode, file);
+
return 0;
}
+static int tracing_release_generic_tr(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+ return 0;
+}
+
+static int tracing_single_release_tr(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ return single_release(inode, file);
+}
+
static int tracing_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
int ret = 0;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
/* If this file was open for write, then erase contents */
- if ((file->f_mode & FMODE_WRITE) &&
- (file->f_flags & O_TRUNC)) {
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ int cpu = tracing_get_cpu(inode);
- if (tc->cpu == RING_BUFFER_ALL_CPUS)
+ if (cpu == RING_BUFFER_ALL_CPUS)
tracing_reset_online_cpus(&tr->trace_buffer);
else
- tracing_reset(&tr->trace_buffer, tc->cpu);
+ tracing_reset(&tr->trace_buffer, cpu);
}
if (file->f_mode & FMODE_READ) {
@@ -2937,6 +3068,10 @@ static int tracing_open(struct inode *inode, struct file *file)
else if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;
}
+
+ if (ret < 0)
+ trace_array_put(tr);
+
return ret;
}
@@ -3293,17 +3428,27 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
static int tracing_trace_options_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
if (tracing_disabled)
return -ENODEV;
- return single_open(file, tracing_trace_options_show, inode->i_private);
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ ret = single_open(file, tracing_trace_options_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
}
static const struct file_operations tracing_iter_fops = {
.open = tracing_trace_options_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_tr,
.write = tracing_trace_options_write,
};
@@ -3783,20 +3928,23 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
+ struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
int ret = 0;
if (tracing_disabled)
return -ENODEV;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
mutex_lock(&trace_types_lock);
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
ret = -ENOMEM;
+ __trace_array_put(tr);
goto out;
}
@@ -3826,9 +3974,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (trace_clocks[tr->clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
- iter->cpu_file = tc->cpu;
- iter->tr = tc->tr;
- iter->trace_buffer = &tc->tr->trace_buffer;
+ iter->tr = tr;
+ iter->trace_buffer = &tr->trace_buffer;
+ iter->cpu_file = tracing_get_cpu(inode);
mutex_init(&iter->mutex);
filp->private_data = iter;
@@ -3843,6 +3991,7 @@ out:
fail:
kfree(iter->trace);
kfree(iter);
+ __trace_array_put(tr);
mutex_unlock(&trace_types_lock);
return ret;
}
@@ -3850,6 +3999,7 @@ fail:
static int tracing_release_pipe(struct inode *inode, struct file *file)
{
struct trace_iterator *iter = file->private_data;
+ struct trace_array *tr = inode->i_private;
mutex_lock(&trace_types_lock);
@@ -3863,6 +4013,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
kfree(iter->trace);
kfree(iter);
+ trace_array_put(tr);
+
return 0;
}
@@ -3939,7 +4091,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (!tracing_is_enabled() && iter->pos)
+ if (!tracing_is_on() && iter->pos)
break;
}
@@ -4000,6 +4152,7 @@ waitagain:
memset(&iter->seq, 0,
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
+ cpumask_clear(iter->started);
iter->pos = -1;
trace_event_read_lock();
@@ -4200,15 +4353,16 @@ static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_cpu *tc = filp->private_data;
- struct trace_array *tr = tc->tr;
+ struct inode *inode = file_inode(filp);
+ struct trace_array *tr = inode->i_private;
+ int cpu = tracing_get_cpu(inode);
char buf[64];
int r = 0;
ssize_t ret;
mutex_lock(&trace_types_lock);
- if (tc->cpu == RING_BUFFER_ALL_CPUS) {
+ if (cpu == RING_BUFFER_ALL_CPUS) {
int cpu, buf_size_same;
unsigned long size;
@@ -4235,7 +4389,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
} else
r = sprintf(buf, "X\n");
} else
- r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
+ r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
mutex_unlock(&trace_types_lock);
@@ -4247,7 +4401,8 @@ static ssize_t
tracing_entries_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_cpu *tc = filp->private_data;
+ struct inode *inode = file_inode(filp);
+ struct trace_array *tr = inode->i_private;
unsigned long val;
int ret;
@@ -4261,8 +4416,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
/* value is in KB */
val <<= 10;
-
- ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
+ ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
if (ret < 0)
return ret;
@@ -4316,10 +4470,12 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
/* disable tracing ? */
if (trace_flags & TRACE_ITER_STOP_ON_FREE)
- tracing_off();
+ tracer_tracing_off(tr);
/* resize the ring buffer to 0 */
tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
+ trace_array_put(tr);
+
return 0;
}
@@ -4328,6 +4484,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
unsigned long addr = (unsigned long)ubuf;
+ struct trace_array *tr = filp->private_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct print_entry *entry;
@@ -4387,7 +4544,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
local_save_flags(irq_flags);
size = sizeof(*entry) + cnt + 2; /* possible \n added */
- buffer = global_trace.trace_buffer.buffer;
+ buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
irq_flags, preempt_count());
if (!event) {
@@ -4478,12 +4635,12 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
* New clock may not be consistent with the previous clock.
* Reset the buffer so that it doesn't have incomparable timestamps.
*/
- tracing_reset_online_cpus(&global_trace.trace_buffer);
+ tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
- tracing_reset_online_cpus(&global_trace.max_buffer);
+ tracing_reset_online_cpus(&tr->max_buffer);
#endif
mutex_unlock(&trace_types_lock);
@@ -4495,10 +4652,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
static int tracing_clock_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
if (tracing_disabled)
return -ENODEV;
- return single_open(file, tracing_clock_show, inode->i_private);
+ if (trace_array_get(tr))
+ return -ENODEV;
+
+ ret = single_open(file, tracing_clock_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
}
struct ftrace_buffer_info {
@@ -4510,31 +4677,40 @@ struct ftrace_buffer_info {
#ifdef CONFIG_TRACER_SNAPSHOT
static int tracing_snapshot_open(struct inode *inode, struct file *file)
{
- struct trace_cpu *tc = inode->i_private;
+ struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
struct seq_file *m;
int ret = 0;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
if (file->f_mode & FMODE_READ) {
iter = __tracing_open(inode, file, true);
if (IS_ERR(iter))
ret = PTR_ERR(iter);
} else {
/* Writes still need the seq_file to hold the private data */
+ ret = -ENOMEM;
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (!m)
- return -ENOMEM;
+ goto out;
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
kfree(m);
- return -ENOMEM;
+ goto out;
}
- iter->tr = tc->tr;
- iter->trace_buffer = &tc->tr->max_buffer;
- iter->cpu_file = tc->cpu;
+ ret = 0;
+
+ iter->tr = tr;
+ iter->trace_buffer = &tr->max_buffer;
+ iter->cpu_file = tracing_get_cpu(inode);
m->private = iter;
file->private_data = m;
}
+out:
+ if (ret < 0)
+ trace_array_put(tr);
return ret;
}
@@ -4616,9 +4792,12 @@ out:
static int tracing_snapshot_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
+ int ret;
+
+ ret = tracing_release(inode, file);
if (file->f_mode & FMODE_READ)
- return tracing_release(inode, file);
+ return ret;
/* If write only, the seq_file is just a stub */
if (m)
@@ -4684,34 +4863,38 @@ static const struct file_operations tracing_pipe_fops = {
};
static const struct file_operations tracing_entries_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_entries_read,
.write = tracing_entries_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations tracing_total_entries_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_total_entries_read,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations tracing_free_buffer_fops = {
+ .open = tracing_open_generic_tr,
.write = tracing_free_buffer_write,
.release = tracing_free_buffer_release,
};
static const struct file_operations tracing_mark_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.write = tracing_mark_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations trace_clock_fops = {
.open = tracing_clock_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_tr,
.write = tracing_clock_write,
};
@@ -4736,23 +4919,26 @@ static const struct file_operations snapshot_raw_fops = {
static int tracing_buffers_open(struct inode *inode, struct file *filp)
{
- struct trace_cpu *tc = inode->i_private;
- struct trace_array *tr = tc->tr;
+ struct trace_array *tr = inode->i_private;
struct ftrace_buffer_info *info;
+ int ret;
if (tracing_disabled)
return -ENODEV;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
+ if (!info) {
+ trace_array_put(tr);
return -ENOMEM;
+ }
mutex_lock(&trace_types_lock);
- tr->ref++;
-
info->iter.tr = tr;
- info->iter.cpu_file = tc->cpu;
+ info->iter.cpu_file = tracing_get_cpu(inode);
info->iter.trace = tr->current_trace;
info->iter.trace_buffer = &tr->trace_buffer;
info->spare = NULL;
@@ -4763,7 +4949,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
mutex_unlock(&trace_types_lock);
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
}
static unsigned int
@@ -4863,8 +5053,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- WARN_ON(!iter->tr->ref);
- iter->tr->ref--;
+ __trace_array_put(iter->tr);
if (info->spare)
ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
@@ -5066,14 +5255,14 @@ static ssize_t
tracing_stats_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
- struct trace_cpu *tc = filp->private_data;
- struct trace_array *tr = tc->tr;
+ struct inode *inode = file_inode(filp);
+ struct trace_array *tr = inode->i_private;
struct trace_buffer *trace_buf = &tr->trace_buffer;
+ int cpu = tracing_get_cpu(inode);
struct trace_seq *s;
unsigned long cnt;
unsigned long long t;
unsigned long usec_rem;
- int cpu = tc->cpu;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
@@ -5126,9 +5315,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
}
static const struct file_operations tracing_stats_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_stats_read,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -5317,10 +5507,20 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
return tr->percpu_dir;
}
+static struct dentry *
+trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
+ void *data, long cpu, const struct file_operations *fops)
+{
+ struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
+
+ if (ret) /* See tracing_get_cpu() */
+ ret->d_inode->i_cdev = (void *)(cpu + 1);
+ return ret;
+}
+
static void
tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
{
- struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
struct dentry *d_cpu;
char cpu_dir[30]; /* 30 characters should be more than enough */
@@ -5336,28 +5536,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
}
/* per cpu trace_pipe */
- trace_create_file("trace_pipe", 0444, d_cpu,
- (void *)&data->trace_cpu, &tracing_pipe_fops);
+ trace_create_cpu_file("trace_pipe", 0444, d_cpu,
+ tr, cpu, &tracing_pipe_fops);
/* per cpu trace */
- trace_create_file("trace", 0644, d_cpu,
- (void *)&data->trace_cpu, &tracing_fops);
+ trace_create_cpu_file("trace", 0644, d_cpu,
+ tr, cpu, &tracing_fops);
- trace_create_file("trace_pipe_raw", 0444, d_cpu,
- (void *)&data->trace_cpu, &tracing_buffers_fops);
+ trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
+ tr, cpu, &tracing_buffers_fops);
- trace_create_file("stats", 0444, d_cpu,
- (void *)&data->trace_cpu, &tracing_stats_fops);
+ trace_create_cpu_file("stats", 0444, d_cpu,
+ tr, cpu, &tracing_stats_fops);
- trace_create_file("buffer_size_kb", 0444, d_cpu,
- (void *)&data->trace_cpu, &tracing_entries_fops);
+ trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
+ tr, cpu, &tracing_entries_fops);
#ifdef CONFIG_TRACER_SNAPSHOT
- trace_create_file("snapshot", 0644, d_cpu,
- (void *)&data->trace_cpu, &snapshot_fops);
+ trace_create_cpu_file("snapshot", 0644, d_cpu,
+ tr, cpu, &snapshot_fops);
- trace_create_file("snapshot_raw", 0444, d_cpu,
- (void *)&data->trace_cpu, &snapshot_raw_fops);
+ trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
+ tr, cpu, &snapshot_raw_fops);
#endif
}
@@ -5612,15 +5812,10 @@ rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_array *tr = filp->private_data;
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
char buf[64];
int r;
- if (buffer)
- r = ring_buffer_record_is_on(buffer);
- else
- r = 0;
-
+ r = tracer_tracing_is_on(tr);
r = sprintf(buf, "%d\n", r);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -5642,11 +5837,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
if (buffer) {
mutex_lock(&trace_types_lock);
if (val) {
- ring_buffer_record_on(buffer);
+ tracer_tracing_on(tr);
if (tr->current_trace->start)
tr->current_trace->start(tr);
} else {
- ring_buffer_record_off(buffer);
+ tracer_tracing_off(tr);
if (tr->current_trace->stop)
tr->current_trace->stop(tr);
}
@@ -5659,9 +5854,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
}
static const struct file_operations rb_simple_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = rb_simple_read,
.write = rb_simple_write,
+ .release = tracing_release_generic_tr,
.llseek = default_llseek,
};
@@ -5688,6 +5884,8 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+ buf->tr = tr;
+
buf->buffer = ring_buffer_alloc(size, rb_flags);
if (!buf->buffer)
return -ENOMEM;
@@ -5775,8 +5973,10 @@ static int new_instance_create(const char *name)
goto out_free_tr;
ret = event_trace_add_tracer(tr->dir, tr);
- if (ret)
+ if (ret) {
+ debugfs_remove_recursive(tr->dir);
goto out_free_tr;
+ }
init_tracer_debugfs(tr, tr->dir);
@@ -5922,13 +6122,13 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
tr, &tracing_iter_fops);
trace_create_file("trace", 0644, d_tracer,
- (void *)&tr->trace_cpu, &tracing_fops);
+ tr, &tracing_fops);
trace_create_file("trace_pipe", 0444, d_tracer,
- (void *)&tr->trace_cpu, &tracing_pipe_fops);
+ tr, &tracing_pipe_fops);
trace_create_file("buffer_size_kb", 0644, d_tracer,
- (void *)&tr->trace_cpu, &tracing_entries_fops);
+ tr, &tracing_entries_fops);
trace_create_file("buffer_total_size_kb", 0444, d_tracer,
tr, &tracing_total_entries_fops);
@@ -5943,11 +6143,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
&trace_clock_fops);
trace_create_file("tracing_on", 0644, d_tracer,
- tr, &rb_simple_fops);
+ tr, &rb_simple_fops);
#ifdef CONFIG_TRACER_SNAPSHOT
trace_create_file("snapshot", 0644, d_tracer,
- (void *)&tr->trace_cpu, &snapshot_fops);
+ tr, &snapshot_fops);
#endif
for_each_tracing_cpu(cpu)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 20572ed88c5..51b44483eb7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -224,6 +224,11 @@ enum {
extern struct list_head ftrace_trace_arrays;
+extern struct mutex trace_types_lock;
+
+extern int trace_array_get(struct trace_array *tr);
+extern void trace_array_put(struct trace_array *tr);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 84b1e045fab..8354dc81ae6 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -26,7 +26,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
{
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event) &&
- perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+ perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
/* No tracing, just counting, so no obvious leak */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 27963e2bf4b..001b349af93 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,12 +27,6 @@
DEFINE_MUTEX(event_mutex);
-DEFINE_MUTEX(event_storage_mutex);
-EXPORT_SYMBOL_GPL(event_storage_mutex);
-
-char event_storage[EVENT_STORAGE_SIZE];
-EXPORT_SYMBOL_GPL(event_storage);
-
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_common_fields);
@@ -41,6 +35,23 @@ static LIST_HEAD(ftrace_common_fields);
static struct kmem_cache *field_cachep;
static struct kmem_cache *file_cachep;
+#define SYSTEM_FL_FREE_NAME (1 << 31)
+
+static inline int system_refcount(struct event_subsystem *system)
+{
+ return system->ref_count & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_inc(struct event_subsystem *system)
+{
+ return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_dec(struct event_subsystem *system)
+{
+ return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME;
+}
+
/* Double loops, do not use break, only goto's work */
#define do_for_each_event_file(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
@@ -97,7 +108,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field = kmem_cache_alloc(field_cachep, GFP_TRACE);
if (!field)
- goto err;
+ return -ENOMEM;
field->name = name;
field->type = type;
@@ -114,11 +125,6 @@ static int __trace_define_field(struct list_head *head, const char *type,
list_add(&field->link, head);
return 0;
-
-err:
- kmem_cache_free(field_cachep, field);
-
- return -ENOMEM;
}
int trace_define_field(struct ftrace_event_call *call, const char *type,
@@ -349,8 +355,8 @@ static void __put_system(struct event_subsystem *system)
{
struct event_filter *filter = system->filter;
- WARN_ON_ONCE(system->ref_count == 0);
- if (--system->ref_count)
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ if (system_refcount_dec(system))
return;
list_del(&system->list);
@@ -359,13 +365,15 @@ static void __put_system(struct event_subsystem *system)
kfree(filter->filter_string);
kfree(filter);
}
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
}
static void __get_system(struct event_subsystem *system)
{
- WARN_ON_ONCE(system->ref_count == 0);
- system->ref_count++;
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ system_refcount_inc(system);
}
static void __get_system_dir(struct ftrace_subsystem_dir *dir)
@@ -379,7 +387,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
/* If the subsystem is about to be freed, the dir must be too */
- WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
+ WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
__put_system(dir->subsystem);
if (!--dir->ref_count)
@@ -393,17 +401,55 @@ static void put_system(struct ftrace_subsystem_dir *dir)
mutex_unlock(&event_mutex);
}
+static void remove_subsystem(struct ftrace_subsystem_dir *dir)
+{
+ if (!dir)
+ return;
+
+ if (!--dir->nr_events) {
+ debugfs_remove_recursive(dir->entry);
+ list_del(&dir->list);
+ __put_system_dir(dir);
+ }
+}
+
+static void *event_file_data(struct file *filp)
+{
+ return ACCESS_ONCE(file_inode(filp)->i_private);
+}
+
+static void remove_event_file_dir(struct ftrace_event_file *file)
+{
+ struct dentry *dir = file->dir;
+ struct dentry *child;
+
+ if (dir) {
+ spin_lock(&dir->d_lock); /* probably unneeded */
+ list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
+ if (child->d_inode) /* probably unneeded */
+ child->d_inode->i_private = NULL;
+ }
+ spin_unlock(&dir->d_lock);
+
+ debugfs_remove_recursive(dir);
+ }
+
+ list_del(&file->list);
+ remove_subsystem(file->system);
+ kmem_cache_free(file_cachep, file);
+}
+
/*
* __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
*/
-static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
- const char *sub, const char *event, int set)
+static int
+__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
+ const char *sub, const char *event, int set)
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
int ret = -EINVAL;
- mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
@@ -429,6 +475,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
ret = 0;
}
+
+ return ret;
+}
+
+static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
+ const char *sub, const char *event, int set)
+{
+ int ret;
+
+ mutex_lock(&event_mutex);
+ ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
mutex_unlock(&event_mutex);
return ret;
@@ -623,13 +680,23 @@ static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file = filp->private_data;
+ struct ftrace_event_file *file;
+ unsigned long flags;
char *buf;
- if (file->flags & FTRACE_EVENT_FL_ENABLED) {
- if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
+ mutex_lock(&event_mutex);
+ file = event_file_data(filp);
+ if (likely(file))
+ flags = file->flags;
+ mutex_unlock(&event_mutex);
+
+ if (!file)
+ return -ENODEV;
+
+ if (flags & FTRACE_EVENT_FL_ENABLED) {
+ if (flags & FTRACE_EVENT_FL_SOFT_DISABLED)
buf = "0*\n";
- else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+ else if (flags & FTRACE_EVENT_FL_SOFT_MODE)
buf = "1*\n";
else
buf = "1\n";
@@ -643,13 +710,10 @@ static ssize_t
event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file = filp->private_data;
+ struct ftrace_event_file *file;
unsigned long val;
int ret;
- if (!file)
- return -EINVAL;
-
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
if (ret)
return ret;
@@ -661,8 +725,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
switch (val) {
case 0:
case 1:
+ ret = -ENODEV;
mutex_lock(&event_mutex);
- ret = ftrace_event_enable_disable(file, val);
+ file = event_file_data(filp);
+ if (likely(file))
+ ret = ftrace_event_enable_disable(file, val);
mutex_unlock(&event_mutex);
break;
@@ -769,7 +836,7 @@ enum {
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = m->private;
+ struct ftrace_event_call *call = event_file_data(m->private);
struct ftrace_event_field *field;
struct list_head *common_head = &ftrace_common_fields;
struct list_head *head = trace_get_fields(call);
@@ -813,6 +880,11 @@ static void *f_start(struct seq_file *m, loff_t *pos)
loff_t l = 0;
void *p;
+ /* ->stop() is called even if ->start() fails */
+ mutex_lock(&event_mutex);
+ if (!event_file_data(m->private))
+ return ERR_PTR(-ENODEV);
+
/* Start by showing the header */
if (!*pos)
return (void *)FORMAT_HEADER;
@@ -827,7 +899,7 @@ static void *f_start(struct seq_file *m, loff_t *pos)
static int f_show(struct seq_file *m, void *v)
{
- struct ftrace_event_call *call = m->private;
+ struct ftrace_event_call *call = event_file_data(m->private);
struct ftrace_event_field *field;
const char *array_descriptor;
@@ -878,6 +950,7 @@ static int f_show(struct seq_file *m, void *v)
static void f_stop(struct seq_file *m, void *p)
{
+ mutex_unlock(&event_mutex);
}
static const struct seq_operations trace_format_seq_ops = {
@@ -889,7 +962,6 @@ static const struct seq_operations trace_format_seq_ops = {
static int trace_format_open(struct inode *inode, struct file *file)
{
- struct ftrace_event_call *call = inode->i_private;
struct seq_file *m;
int ret;
@@ -898,7 +970,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return ret;
m = file->private_data;
- m->private = call;
+ m->private = file;
return 0;
}
@@ -906,19 +978,22 @@ static int trace_format_open(struct inode *inode, struct file *file)
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ int id = (long)event_file_data(filp);
struct trace_seq *s;
int r;
if (*ppos)
return 0;
+ if (unlikely(!id))
+ return -ENODEV;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
- trace_seq_printf(s, "%d\n", call->event.type);
+ trace_seq_printf(s, "%d\n", id);
r = simple_read_from_buffer(ubuf, cnt, ppos,
s->buffer, s->len);
@@ -930,21 +1005,28 @@ static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call;
struct trace_seq *s;
- int r;
+ int r = -ENODEV;
if (*ppos)
return 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
+
if (!s)
return -ENOMEM;
trace_seq_init(s);
- print_event_filter(call, s);
- r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+ mutex_lock(&event_mutex);
+ call = event_file_data(filp);
+ if (call)
+ print_event_filter(call, s);
+ mutex_unlock(&event_mutex);
+
+ if (call)
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
@@ -955,9 +1037,9 @@ static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call;
char *buf;
- int err;
+ int err = -ENODEV;
if (cnt >= PAGE_SIZE)
return -EINVAL;
@@ -972,7 +1054,12 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
}
buf[cnt] = '\0';
- err = apply_event_filter(call, buf);
+ mutex_lock(&event_mutex);
+ call = event_file_data(filp);
+ if (call)
+ err = apply_event_filter(call, buf);
+ mutex_unlock(&event_mutex);
+
free_page((unsigned long) buf);
if (err < 0)
return err;
@@ -992,6 +1079,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
int ret;
/* Make sure the system still exists */
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
list_for_each_entry(dir, &tr->systems, list) {
@@ -1007,6 +1095,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
}
exit_loop:
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
if (!system)
return -ENODEV;
@@ -1014,9 +1103,17 @@ static int subsystem_open(struct inode *inode, struct file *filp)
/* Some versions of gcc think dir can be uninitialized here */
WARN_ON(!dir);
+ /* Still need to increment the ref count of the system */
+ if (trace_array_get(tr) < 0) {
+ put_system(dir);
+ return -ENODEV;
+ }
+
ret = tracing_open_generic(inode, filp);
- if (ret < 0)
+ if (ret < 0) {
+ trace_array_put(tr);
put_system(dir);
+ }
return ret;
}
@@ -1027,16 +1124,23 @@ static int system_tr_open(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
int ret;
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
/* Make a temporary dir that has no system but points to tr */
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
- if (!dir)
+ if (!dir) {
+ trace_array_put(tr);
return -ENOMEM;
+ }
dir->tr = tr;
ret = tracing_open_generic(inode, filp);
- if (ret < 0)
+ if (ret < 0) {
+ trace_array_put(tr);
kfree(dir);
+ }
filp->private_data = dir;
@@ -1047,6 +1151,8 @@ static int subsystem_release(struct inode *inode, struct file *file)
{
struct ftrace_subsystem_dir *dir = file->private_data;
+ trace_array_put(dir->tr);
+
/*
* If dir->subsystem is NULL, then this is a temporary
* descriptor that was made for a trace_array to enable
@@ -1143,6 +1249,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
static int ftrace_event_set_open(struct inode *inode, struct file *file);
+static int ftrace_event_release(struct inode *inode, struct file *file);
static const struct seq_operations show_event_seq_ops = {
.start = t_start,
@@ -1170,7 +1277,7 @@ static const struct file_operations ftrace_set_event_fops = {
.read = seq_read,
.write = ftrace_event_write,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = ftrace_event_release,
};
static const struct file_operations ftrace_enable_fops = {
@@ -1188,7 +1295,6 @@ static const struct file_operations ftrace_event_format_fops = {
};
static const struct file_operations ftrace_event_id_fops = {
- .open = tracing_open_generic,
.read = event_id_read,
.llseek = default_llseek,
};
@@ -1247,6 +1353,15 @@ ftrace_event_open(struct inode *inode, struct file *file,
return ret;
}
+static int ftrace_event_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ return seq_release(inode, file);
+}
+
static int
ftrace_event_avail_open(struct inode *inode, struct file *file)
{
@@ -1260,12 +1375,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
{
const struct seq_operations *seq_ops = &show_set_event_seq_ops;
struct trace_array *tr = inode->i_private;
+ int ret;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_clear_events(tr);
- return ftrace_event_open(inode, file, seq_ops);
+ ret = ftrace_event_open(inode, file, seq_ops);
+ if (ret < 0)
+ trace_array_put(tr);
+ return ret;
}
static struct event_subsystem *
@@ -1279,7 +1401,15 @@ create_new_subsystem(const char *name)
return NULL;
system->ref_count = 1;
- system->name = name;
+
+ /* Only allocate if dynamic (kprobes and modules) */
+ if (!core_kernel_data((unsigned long)name)) {
+ system->ref_count |= SYSTEM_FL_FREE_NAME;
+ system->name = kstrdup(name, GFP_KERNEL);
+ if (!system->name)
+ goto out_free;
+ } else
+ system->name = name;
system->filter = NULL;
@@ -1292,6 +1422,8 @@ create_new_subsystem(const char *name)
return system;
out_free:
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
return NULL;
}
@@ -1410,8 +1542,8 @@ event_create_dir(struct dentry *parent,
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
- trace_create_file("id", 0444, file->dir, call,
- id);
+ trace_create_file("id", 0444, file->dir,
+ (void *)(long)call->event.type, id);
#endif
/*
@@ -1436,33 +1568,16 @@ event_create_dir(struct dentry *parent,
return 0;
}
-static void remove_subsystem(struct ftrace_subsystem_dir *dir)
-{
- if (!dir)
- return;
-
- if (!--dir->nr_events) {
- debugfs_remove_recursive(dir->entry);
- list_del(&dir->list);
- __put_system_dir(dir);
- }
-}
-
static void remove_event_from_tracers(struct ftrace_event_call *call)
{
struct ftrace_event_file *file;
struct trace_array *tr;
do_for_each_event_file_safe(tr, file) {
-
if (file->event_call != call)
continue;
- list_del(&file->list);
- debugfs_remove_recursive(file->dir);
- remove_subsystem(file->system);
- kmem_cache_free(file_cachep, file);
-
+ remove_event_file_dir(file);
/*
* The do_for_each_event_file_safe() is
* a double loop. After finding the call for this
@@ -1591,6 +1706,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call,
int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
ret = __register_event(call, NULL);
@@ -1598,11 +1714,13 @@ int trace_add_event_call(struct ftrace_event_call *call)
__add_event_to_tracers(call, NULL);
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
return ret;
}
/*
- * Must be called under locking both of event_mutex and trace_event_sem.
+ * Must be called under locking of trace_types_lock, event_mutex and
+ * trace_event_sem.
*/
static void __trace_remove_event_call(struct ftrace_event_call *call)
{
@@ -1611,14 +1729,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
destroy_preds(call);
}
+static int probe_remove_event_call(struct ftrace_event_call *call)
+{
+ struct trace_array *tr;
+ struct ftrace_event_file *file;
+
+#ifdef CONFIG_PERF_EVENTS
+ if (call->perf_refcount)
+ return -EBUSY;
+#endif
+ do_for_each_event_file(tr, file) {
+ if (file->event_call != call)
+ continue;
+ /*
+ * We can't rely on ftrace_event_enable_disable(enable => 0)
+ * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+ * TRACE_REG_UNREGISTER.
+ */
+ if (file->flags & FTRACE_EVENT_FL_ENABLED)
+ return -EBUSY;
+ break;
+ } while_for_each_event_file();
+
+ __trace_remove_event_call(call);
+
+ return 0;
+}
+
/* Remove an event_call */
-void trace_remove_event_call(struct ftrace_event_call *call)
+int trace_remove_event_call(struct ftrace_event_call *call)
{
+ int ret;
+
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
down_write(&trace_event_sem);
- __trace_remove_event_call(call);
+ ret = probe_remove_event_call(call);
up_write(&trace_event_sem);
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
}
#define for_each_event(event, start, end) \
@@ -1703,6 +1854,16 @@ static void trace_module_add_events(struct module *mod)
struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call **call, **start, **end;
+ if (!mod->num_trace_events)
+ return;
+
+ /* Don't add infrastructure for mods without tracepoints */
+ if (trace_module_has_bad_taint(mod)) {
+ pr_err("%s: module has bad taint, not creating trace events\n",
+ mod->name);
+ return;
+ }
+
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
@@ -1762,6 +1923,7 @@ static int trace_module_notify(struct notifier_block *self,
{
struct module *mod = data;
+ mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
switch (val) {
case MODULE_STATE_COMING:
@@ -1772,6 +1934,7 @@ static int trace_module_notify(struct notifier_block *self,
break;
}
mutex_unlock(&event_mutex);
+ mutex_unlock(&trace_types_lock);
return 0;
}
@@ -2188,12 +2351,8 @@ __trace_remove_event_dirs(struct trace_array *tr)
{
struct ftrace_event_file *file, *next;
- list_for_each_entry_safe(file, next, &tr->events, list) {
- list_del(&file->list);
- debugfs_remove_recursive(file->dir);
- remove_subsystem(file->system);
- kmem_cache_free(file_cachep, file);
- }
+ list_for_each_entry_safe(file, next, &tr->events, list)
+ remove_event_file_dir(file);
}
static void
@@ -2329,11 +2488,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
int event_trace_del_tracer(struct trace_array *tr)
{
- /* Disable any running events */
- __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
-
mutex_lock(&event_mutex);
+ /* Disable any running events */
+ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
+
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
debugfs_remove_recursive(tr->event_dir);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e1b653f7e1c..0a1edc694d6 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -631,17 +631,15 @@ static void append_filter_err(struct filter_parse_state *ps,
free_page((unsigned long) buf);
}
+/* caller must hold event_mutex */
void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
- struct event_filter *filter;
+ struct event_filter *filter = call->filter;
- mutex_lock(&event_mutex);
- filter = call->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
- mutex_unlock(&event_mutex);
}
void print_subsystem_event_filter(struct event_subsystem *system,
@@ -1835,23 +1833,22 @@ static int create_system_filter(struct event_subsystem *system,
return err;
}
+/* caller must hold event_mutex */
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
struct event_filter *filter;
- int err = 0;
-
- mutex_lock(&event_mutex);
+ int err;
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(call);
filter = call->filter;
if (!filter)
- goto out_unlock;
+ return 0;
RCU_INIT_POINTER(call->filter, NULL);
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
- goto out_unlock;
+ return 0;
}
err = create_filter(call, filter_string, true, &filter);
@@ -1878,8 +1875,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
__free_filter(tmp);
}
}
-out_unlock:
- mutex_unlock(&event_mutex);
return err;
}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d21a7467008..d7d0b50b1b7 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __array
#define __array(type, item, len) \
do { \
+ char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
- mutex_lock(&event_storage_mutex); \
- snprintf(event_storage, sizeof(event_storage), \
- "%s[%d]", #type, len); \
- ret = trace_define_field(event_call, event_storage, #item, \
+ ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
- mutex_unlock(&event_storage_mutex); \
if (ret) \
return ret; \
} while (0);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b19d065a28c..2aefbee93a6 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
- if (likely(!tracer_enabled))
+ if (!tracer_enabled || !tracing_is_enabled())
return;
cpu = raw_smp_processor_id();
@@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
else
return;
- if (!tracer_enabled)
+ if (!tracer_enabled || !tracing_is_enabled())
return;
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9f46e98ba8f..64abc8ca928 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -90,7 +90,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
}
static int register_probe_event(struct trace_probe *tp);
-static void unregister_probe_event(struct trace_probe *tp);
+static int unregister_probe_event(struct trace_probe *tp);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
@@ -281,6 +281,8 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
static int
disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
+ struct ftrace_event_file **old = NULL;
+ int wait = 0;
int ret = 0;
mutex_lock(&probe_enable_lock);
@@ -314,10 +316,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
}
rcu_assign_pointer(tp->files, new);
-
- /* Make sure the probe is done with old files */
- synchronize_sched();
- kfree(old);
+ wait = 1;
} else
tp->flags &= ~TP_FLAG_PROFILE;
@@ -326,11 +325,25 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
disable_kretprobe(&tp->rp);
else
disable_kprobe(&tp->rp.kp);
+ wait = 1;
}
out_unlock:
mutex_unlock(&probe_enable_lock);
+ if (wait) {
+ /*
+ * Synchronize with kprobe_trace_func/kretprobe_trace_func
+ * to ensure disabled (all running handlers are finished).
+ * This is not only for kfree(), but also the caller,
+ * trace_remove_event_call() supposes it for releasing
+ * event_call related objects, which will be accessed in
+ * the kprobe_trace_func/kretprobe_trace_func.
+ */
+ synchronize_sched();
+ kfree(old); /* Ignored if link == NULL */
+ }
+
return ret;
}
@@ -398,9 +411,12 @@ static int unregister_trace_probe(struct trace_probe *tp)
if (trace_probe_is_enabled(tp))
return -EBUSY;
+ /* Will fail if probe is being used by ftrace or perf */
+ if (unregister_probe_event(tp))
+ return -EBUSY;
+
__unregister_trace_probe(tp);
list_del(&tp->list);
- unregister_probe_event(tp);
return 0;
}
@@ -679,7 +695,9 @@ static int release_all_trace_probes(void)
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
tp = list_entry(probe_list.next, struct trace_probe, list);
- unregister_trace_probe(tp);
+ ret = unregister_trace_probe(tp);
+ if (ret)
+ goto end;
free_trace_probe(tp);
}
@@ -1312,11 +1330,15 @@ static int register_probe_event(struct trace_probe *tp)
return ret;
}
-static void unregister_probe_event(struct trace_probe *tp)
+static int unregister_probe_event(struct trace_probe *tp)
{
+ int ret;
+
/* tp->event is unregistered in trace_remove_event_call() */
- trace_remove_event_call(&tp->call);
- kfree(tp->call.print_fmt);
+ ret = trace_remove_event_call(&tp->call);
+ if (!ret)
+ kfree(tp->call.print_fmt);
+ return ret;
}
/* Make a debugfs interface for controlling probe points */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8f2ac73c7a5..322e1646107 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
+ unsigned long irq_flags;
+ int pc;
int syscall_nr;
int size;
@@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
- sys_data->enter_event->event.type, size, 0, 0);
+ sys_data->enter_event->event.type, size, irq_flags, pc);
if (!event)
return;
@@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
if (!filter_current_check_discard(buffer, sys_data->enter_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
@@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
+ unsigned long irq_flags;
+ int pc;
int syscall_nr;
syscall_nr = trace_get_syscall_nr(current, regs);
@@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
if (!sys_data)
return;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
- sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
+ sys_data->exit_event->event.type, sizeof(*entry),
+ irq_flags, pc);
if (!event)
return;
@@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
if (!filter_current_check_discard(buffer, sys_data->exit_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
static int reg_event_syscall_enter(struct ftrace_event_file *file,
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 32494fb0ee6..6fd72b76852 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -70,7 +70,7 @@ struct trace_uprobe {
(sizeof(struct probe_arg) * (n)))
static int register_uprobe_event(struct trace_uprobe *tu);
-static void unregister_uprobe_event(struct trace_uprobe *tu);
+static int unregister_uprobe_event(struct trace_uprobe *tu);
static DEFINE_MUTEX(uprobe_lock);
static LIST_HEAD(uprobe_list);
@@ -164,11 +164,17 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
}
/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
-static void unregister_trace_uprobe(struct trace_uprobe *tu)
+static int unregister_trace_uprobe(struct trace_uprobe *tu)
{
+ int ret;
+
+ ret = unregister_uprobe_event(tu);
+ if (ret)
+ return ret;
+
list_del(&tu->list);
- unregister_uprobe_event(tu);
free_trace_uprobe(tu);
+ return 0;
}
/* Register a trace_uprobe and probe_event */
@@ -181,9 +187,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
/* register as an event */
old_tp = find_probe_event(tu->call.name, tu->call.class->system);
- if (old_tp)
+ if (old_tp) {
/* delete old event */
- unregister_trace_uprobe(old_tp);
+ ret = unregister_trace_uprobe(old_tp);
+ if (ret)
+ goto end;
+ }
ret = register_uprobe_event(tu);
if (ret) {
@@ -256,6 +265,8 @@ static int create_trace_uprobe(int argc, char **argv)
group = UPROBE_EVENT_SYSTEM;
if (is_delete) {
+ int ret;
+
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
@@ -269,9 +280,9 @@ static int create_trace_uprobe(int argc, char **argv)
return -ENOENT;
}
/* delete an event */
- unregister_trace_uprobe(tu);
+ ret = unregister_trace_uprobe(tu);
mutex_unlock(&uprobe_lock);
- return 0;
+ return ret;
}
if (argc < 2) {
@@ -283,8 +294,10 @@ static int create_trace_uprobe(int argc, char **argv)
return -EINVAL;
}
arg = strchr(argv[1], ':');
- if (!arg)
+ if (!arg) {
+ ret = -EINVAL;
goto fail_address_parse;
+ }
*arg++ = '\0';
filename = argv[1];
@@ -406,16 +419,20 @@ fail_address_parse:
return ret;
}
-static void cleanup_all_probes(void)
+static int cleanup_all_probes(void)
{
struct trace_uprobe *tu;
+ int ret = 0;
mutex_lock(&uprobe_lock);
while (!list_empty(&uprobe_list)) {
tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
- unregister_trace_uprobe(tu);
+ ret = unregister_trace_uprobe(tu);
+ if (ret)
+ break;
}
mutex_unlock(&uprobe_lock);
+ return ret;
}
/* Probes listing interfaces */
@@ -460,8 +477,13 @@ static const struct seq_operations probes_seq_op = {
static int probes_open(struct inode *inode, struct file *file)
{
- if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
- cleanup_all_probes();
+ int ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = cleanup_all_probes();
+ if (ret)
+ return ret;
+ }
return seq_open(file, &probes_seq_op);
}
@@ -968,12 +990,17 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return ret;
}
-static void unregister_uprobe_event(struct trace_uprobe *tu)
+static int unregister_uprobe_event(struct trace_uprobe *tu)
{
+ int ret;
+
/* tu->event is unregistered in trace_remove_event_call() */
- trace_remove_event_call(&tu->call);
+ ret = trace_remove_event_call(&tu->call);
+ if (ret)
+ return ret;
kfree(tu->call.print_fmt);
tu->call.print_fmt = NULL;
+ return 0;
}
/* Make a trace interface for controling probe points */